<a href="https://colab.research.google.com/github/SHocombe/Game-Theory/blob/main/gt_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import pymc as pm
import numpy as np
import arviz as az
from sklearn.preprocessing import StandardScaler

In [None]:
def aggregate_song_data(song_df):

    song_df = song_df.fillna(0)
    columns_to_check = ['Tempo', 'Artist', 'Track', 'Views', 'Likes', 'Stream']
    mask = ~(song_df[columns_to_check] == 0).any(axis=1)
    filtered_df = song_df[mask]

    artist_df = filtered_df.groupby("Artist").agg(
        avg_streams=('Stream', 'mean'),
        avg_youtube_views=('Views', 'mean'),
        avg_youtube_likes=('Likes', 'mean'),
        avg_youtube_comments=('Comments', 'mean'),
        avg_danceability=('Danceability', 'mean'),
        avg_liveliness=('Liveness', 'mean'),
        avg_valence=('Valence', 'mean'),
        avg_acousticness=('Acousticness', 'mean'),
        avg_instrumentalness=('Instrumentalness', 'mean'),
        avg_loudness=('Loudness', 'mean'),
        avg_speechiness=('Speechiness', 'mean'),
        avg_tempo=('Tempo', 'mean'),
        avg_energy=('Energy', 'mean'),
        total_songs=('Track', 'count'),
        official_video_rate=('official_video', lambda x: x.astype(int).mean()),
        license_rate=('Licensed', lambda x: x.astype(int).mean())
    ).reset_index()

    artist_df['percent_liked'] = (artist_df['avg_youtube_likes'] / artist_df['avg_youtube_views']) * 100

    artist_df = artist_df.fillna(0)

    return artist_df


song_df = pd.read_csv("GT_song_set.csv")
artist_df = aggregate_song_data(song_df)
artist_df_raw = artist_df.copy()
#artist_df

In [None]:
def process_artist_data(artist_df):

    numeric_cols = ['avg_streams', 'avg_youtube_views', 'avg_youtube_likes', 'avg_youtube_comments',
                    'avg_danceability', 'avg_liveliness', 'avg_valence', 'avg_acousticness',
                    'avg_instrumentalness', 'avg_loudness', 'avg_speechiness', 'avg_tempo',
                    'avg_energy', 'total_songs', 'official_video_rate', 'license_rate', 'percent_liked']

    artist_df[numeric_cols] = artist_df[numeric_cols].apply(pd.to_numeric, errors='coerce')

    artist_df.dropna(inplace=True)

    for col in ["avg_danceability", "avg_valence", "avg_energy", "avg_liveliness"]:
      artist_df[f"{col}_squared"] = artist_df[col] ** 2

    scaler = StandardScaler()

    artist_df[['avg_streams', 'avg_youtube_views', 'avg_youtube_likes', 'avg_youtube_comments',
        'avg_danceability', 'avg_liveliness', 'avg_valence', 'avg_acousticness',
        'avg_instrumentalness', 'avg_loudness', 'avg_speechiness', 'avg_tempo',
        'avg_energy', 'total_songs', 'official_video_rate', 'license_rate', 'percent_liked']] = scaler.fit_transform(artist_df[['avg_streams', 'avg_youtube_views',
                                                                                                                                'avg_youtube_likes', 'avg_youtube_comments',
                                                                                                                                'avg_danceability', 'avg_liveliness',
                                                                                                                                'avg_valence', 'avg_acousticness',
                                                                                                                                'avg_instrumentalness', 'avg_loudness',
                                                                                                                                'avg_speechiness', 'avg_tempo',
                                                                                                                                'avg_energy', 'total_songs', 'official_video_rate', 'license_rate', 'percent_liked']])

    #1 = Successful, 0 = Not Successful
    success_threshold = artist_df['avg_streams'].quantile(0.5)
    artist_df['success'] = (artist_df['avg_streams'] >= success_threshold).astype(int)

    return artist_df

processed_df = process_artist_data(artist_df)
processed_df


In [None]:
def bayesian_regression(artist_df):

    predictors = [#'avg_youtube_views',
                  #'avg_youtube_likes',
                  'avg_danceability', 'avg_valence',
                  'avg_energy',
                  #'avg_streams',
                  #'official_video_rate',
                  'license_rate', 'percent_liked', #'avg_acousticness',
                  'avg_liveliness']

    artist_df.dropna(inplace=True)

    X = artist_df[predictors].values
    y = artist_df['success'].values


    X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)

    with pm.Model() as model:
        #priors for coefficients
        beta = pm.Normal("beta", mu=0, sigma=1, shape=len(predictors))
        alpha = pm.Normal("alpha", mu=0, sigma=1)

        #log reg model
        mu = alpha + pm.math.dot(X, beta)
        p = pm.Deterministic("p", pm.math.sigmoid(mu))

        y_obs = pm.Bernoulli("y_obs", p=p, observed=y)

        #start = pm.find_MAP()
        trace = pm.sample(2000, tune=1000, cores=2, target_accept=0.9, return_inferencedata=True)

    az.plot_trace(trace)
    az.summary(trace, var_names=["alpha", "beta"])

    return trace

trace = bayesian_regression(processed_df)


summary = az.summary(trace, var_names=["alpha", "beta"])
print(summary)

az.plot_trace(trace)
az.plot_posterior(trace, var_names=["alpha", "beta"])



In [None]:
import requests
import json

CLIENT_ID = #removed for privacy reasons
CLIENT_SECRET = #removed for privacy reasons

def get_access_token():
    url = "https://accounts.spotify.com/api/token"
    headers = {"Content-Type": "application/x-www-form-urlencoded"}
    data = {"grant_type": "client_credentials", "client_id": CLIENT_ID, "client_secret": CLIENT_SECRET}

    response = requests.post(url, headers=headers, data=data)
    json_response = response.json()

    #error handling
    if response.status_code != 200:
        print(f"Error getting access token: {json_response.get('error', 'Unknown error')}")
        print(f"Error description: {json_response.get('error_description', 'No description')}")
        return None

    return json_response.get("access_token")

ACCESS_TOKEN = get_access_token()
if not ACCESS_TOKEN:
    print("Failed to get access token. Exiting.")
    exit()

print("Access Token:", ACCESS_TOKEN)

def get_artist_id(artist_name, token):
    encoded_name = requests.utils.quote(artist_name)
    url = f"https://api.spotify.com/v1/search?q={encoded_name}&type=artist"
    headers = {"Authorization": f"Bearer {token}"}

    response = requests.get(url, headers=headers)

    if response.status_code != 200:
        print(f"Error searching for artist: {response.status_code}")
        print(response.text)
        return None

    data = response.json()
    if data["artists"]["items"]:
        return data["artists"]["items"][0]["id"]
    else:
        return None

#get ID
artist_name = "mary in the junkyard"
artist_id = get_artist_id(artist_name, ACCESS_TOKEN)
print(f"Artist ID for {artist_name}: {artist_id}")

def get_artist_top_tracks(artist_id, token):
    url = f"https://api.spotify.com/v1/artists/{artist_id}/top-tracks?market=GB"
    headers = {"Authorization": f"Bearer {token}"}

    response = requests.get(url, headers=headers)

    if response.status_code != 200:
        print(f"Error getting top tracks: {response.status_code}")
        print(response.text)
        return []

    return response.json()["tracks"]

#fetch top tracks
top_tracks = []
if artist_id:
    top_tracks = get_artist_top_tracks(artist_id, ACCESS_TOKEN)
    for track in top_tracks:
        print(f"{track['name']} - Popularity: {track['popularity']} - ID: {track['id']}")
else:
    print("Artist not found.")

def get_audio_features(track_id, token):
    url = f"https://api.spotify.com/v1/audio-features/{track_id}"
    headers = {"Authorization": f"Bearer {token}"}

    response = requests.get(url, headers=headers)

    if response.status_code != 200:
        print(f"Error getting audio features for track {track_id}: {response.status_code}")
        print(response.text)
        return None

    return response.json()

#audio features
if top_tracks:
    print("\nAudio Features:")
    for track in top_tracks:
        track_id = track["id"]
        features = get_audio_features(track_id, ACCESS_TOKEN)
        if features:
            print(f"{track['name']} -> Valence: {features['valence']}, Energy: {features['energy']}, Danceability: {features['danceability']}, Liveness: {features['liveness']} ")
        else:
            print(f"No audio features found for {track['name']}")

In [None]:
def predict_success_probability(trace, new_band_features):

    beta_samples = np.array(trace.posterior["beta"]).squeeze()
    beta_samples = beta_samples.reshape(len(new_band_features), -1)
    alpha_samples = trace.posterior["alpha"].values.flatten()

    mu_samples = alpha_samples + np.dot(beta_samples.T, new_band_features)

    p_success_samples = 1 / (1 + np.exp(-mu_samples))

    return p_success_samples

def standardize_new_band(new_band_raw, dataset):
    feature_columns = ['avg_danceability', 'avg_valence',
                       'avg_energy', 'license_rate', 'percent_liked', 'avg_liveliness']


    means = dataset[feature_columns].mean().values
    stds = dataset[feature_columns].std().values

    new_band_standardized = (np.array(new_band_raw) - means) / stds

    return new_band_standardized


new_band_raw = [0.22, 0.34, 0.36, 0, 0.011, 0.25]  #dance, valence , energy, licence, like, live
new_band_standardized = standardize_new_band(new_band_raw, artist_df_raw)
print("New Band Features (Standardized):", new_band_standardized)

p_success_samples = predict_success_probability(trace, new_band_standardized)

mean_p_success = np.mean(p_success_samples)
hdi_3, hdi_97 = np.percentile(p_success_samples, [3, 97])

print(f"Predicted probability of success: {mean_p_success:.3f}")
print(f"95% Credible Interval: [{hdi_3:.3f}, {hdi_97:.3f}]")