# Question: "What is the average popularity score of Spotify's Alternative/Emo songs, and what portion of these songs have a popularity score below 40"

In [1]:
import requests
import random
import numpy as np
import math

# -------------------------------------------------------
# AUTHENTICATION
# -------------------------------------------------------
SPOTIFY_CLIENT_ID = "c4dfd96c5e75436fabb5c6c7e4d71d50"
SPOTIFY_CLIENT_SECRET = "de8a9f54f4e640faa44f4d60da15549f"

# Get access token
auth_response = requests.post(
    "https://accounts.spotify.com/api/token",
    data={
        "grant_type": "client_credentials",
        "client_id": SPOTIFY_CLIENT_ID,
        "client_secret": SPOTIFY_CLIENT_SECRET,
    }
)

auth_data = auth_response.json()
access_token = auth_data["access_token"]

headers = {
    "Authorization": f"Bearer {access_token}"
}

# -------------------------------------------------------
# FUNCTION: GET TRACKS FROM ALTERNATIVE/EMO GENRE
# -------------------------------------------------------

def get_genre_tracks(genre, limit=50, pages=4):
    """Pulls multiple pages of track search results for a genre."""
    track_ids = []

    for i in range(pages):
        offset = i * limit
        url = (
            f"https://api.spotify.com/v1/search?q=genre:{genre}&type=track"
            f"&limit={limit}&offset={offset}"
        )
        response = requests.get(url, headers=headers)
        data = response.json()

        if "tracks" in data and "items" in data["tracks"]:
            for track in data["tracks"]["items"]:
                track_ids.append(track["id"])

    return track_ids


# -------------------------------------------------------
# GET LARGE POOL OF EMO/ALTERNATIVE SONGS
# -------------------------------------------------------
genres_to_use = ["alternative", "emo"]

track_pool = []
for g in genres_to_use:
    track_pool.extend(get_genre_tracks(g))

track_pool = list(set(track_pool))  # remove duplicates

print(f"Total unique tracks pulled from Spotify: {len(track_pool)}")

# -------------------------------------------------------
# RANDOM SAMPLING
# -------------------------------------------------------
SAMPLE_SIZE = 100

if len(track_pool) < SAMPLE_SIZE:
    print("Not enough tracks found. Reducing sample size.")
    SAMPLE_SIZE = len(track_pool)

sample_ids = random.sample(track_pool, SAMPLE_SIZE)

# -------------------------------------------------------
# COLLECT POPULARITY SCORES
# -------------------------------------------------------
popularities = []

for tid in sample_ids:
    url = f"https://api.spotify.com/v1/tracks/{tid}"
    r = requests.get(url, headers=headers)
    data = r.json()

    if "popularity" in data:
        popularities.append(data["popularity"])

popularities = np.array(popularities)
n = len(popularities)
print(f"\nSample size actually collected: {n}")

# -------------------------------------------------------
# POINT ESTIMATORS
# -------------------------------------------------------
mean_popularity = np.mean(popularities)
std_popularity = np.std(popularities, ddof=1)

prop_below_40 = np.sum(popularities < 40) / n

print("\n--- POINT ESTIMATES ---")
print(f"Mean popularity: {mean_popularity:.2f}")
print(f"Standard deviation: {std_popularity:.2f}")
print(f"Proportion below 40: {prop_below_40:.3f}")

# -------------------------------------------------------
# 95% CONFIDENCE INTERVALS
# -------------------------------------------------------
z = 1.96

# Mean CI
SE_mean = std_popularity / math.sqrt(n)
ME_mean = z * SE_mean
CI_mean = (mean_popularity - ME_mean, mean_popularity + ME_mean)

# Proportion CI
SE_prop = math.sqrt(prop_below_40 * (1 - prop_below_40) / n)
ME_prop = z * SE_prop
CI_prop = (prop_below_40 - ME_prop, prop_below_40 + ME_prop)

print("\n--- 95% CONFIDENCE INTERVALS ---")
print(f"Mean popularity CI: ({CI_mean[0]:.2f}, {CI_mean[1]:.2f})")
print(f"Proportion <40 CI: ({CI_prop[0]:.3f}, {CI_prop[1]:.3f})")

# -------------------------------------------------------
# FINAL ANSWER TO THE QUESTION
# -------------------------------------------------------
print("\n=====================================================")
print("FINAL ANSWER TO THE QUESTION:")
print("“What is the average popularity score of Spotify’s Alternative/Emo songs,")
print(" and what proportion of these songs have a popularity score below 40?”\n")

print(f"Based on a random sample of {n} Alternative/Emo songs from Spotify:")
print(f" • The average popularity score is **{mean_popularity:.1f}**.")
print(f" • Approximately **{prop_below_40*100:.1f}%** of sampled songs")
print("   have a popularity score below 40.\n")

print("These results are supported by the calculated sampling statistics and")
print("the 95% confidence intervals for both the mean popularity and the proportion.")
print("=====================================================")


Total unique tracks pulled from Spotify: 387

Sample size actually collected: 100

--- POINT ESTIMATES ---
Mean popularity: 0.00
Standard deviation: 0.00
Proportion below 40: 1.000

--- 95% CONFIDENCE INTERVALS ---
Mean popularity CI: (0.00, 0.00)
Proportion <40 CI: (1.000, 1.000)

FINAL ANSWER TO THE QUESTION:
“What is the average popularity score of Spotify’s Alternative/Emo songs,
 and what proportion of these songs have a popularity score below 40?”

Based on a random sample of 100 Alternative/Emo songs from Spotify:
 • The average popularity score is **0.0**.
 • Approximately **100.0%** of sampled songs
   have a popularity score below 40.

These results are supported by the calculated sampling statistics and
the 95% confidence intervals for both the mean popularity and the proportion.
