# Song Popularity Predictor for Spotify

Following function uses neural network to predict song popularity. This is only available for the songs that are uploaded to spotify. 

In [28]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from IPython.display import display
import numpy as np
import pandas as pd
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import MinMaxScaler
import keras
from keras.models import Sequential
from keras import regularizers
from keras.layers import Dense, Dropout, Activation, Flatten
from get_song_info import get_song_info

def predict_song_popularity(Track_URI):
    # Using Client Credentials for authorization
    client_credentials_manager = SpotifyClientCredentials(client_id="f2d11bfce2cf4bae8e096454ca4299ed",client_secret="0192b251aff04567a49d33d83287a254")
    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
    # get access token for spotify
    token = client_credentials_manager.get_access_token()
    track_info = sp.tracks([Track_URI])
    track_info1 = track_info['tracks'][0]
    artist_name = (track_info1['artists'][0]['name'])
    song_name = (track_info1['name'])
    # get saved spotify dataset
    data = pd.read_csv("spotifydatafile.csv")
    data1 = data[data.columns[4:-1]]
    index = list(data1.dtypes.index)
    # log transformation for skewed features
    transformation_constants = {}
    data_for_transformation = data1[data1.columns[0:len(index)]]
    index_for_transformation = ['artist_followers','confidence_kurtosis', \
    'loudness_std','loudness_kurtosis','tempo_kurtosis','tempo_confidence','key_kurtosis', \
    'key_confidence_kurtosis','mode_kurtosis','mode_confidence_kurtosis','time_signature_mean',\
    'time_signature_kurtosis','time_signature_confidence_mean','time_signature_confidence_kurtosis',\
    'C_dominance_std','C_dominance_kurtosis','C#_dominance_std','C#_dominance_kurtosis',\
    'D_dominance_std','D_dominance_kurtosis','D#_dominance_std','D#_dominance_kurtosis',\
    'E_dominance_std','E_dominance_kurtosis','F_dominance_std','F_dominance_kurtosis',\
    'F#_dominance_std','F#_dominance_kurtosis','G_dominance_std','G_dominance_kurtosis',\
    'G#_dominance_std','G#_dominance_kurtosis','A_dominance_std','A_dominance_kurtosis',\
    'A#_dominance_std','A#_dominance_kurtosis','B_dominance_std','B_dominance_kurtosis',\
    'pitch_entropy','timbre_1_std','timbre_1_kurtosis','timbre_2_std','timbre_2_kurtosis',\
    'timbre_3_std','timbre_3_kurtosis','timbre_4_std','timbre_4_kurtosis','timbre_5_std',\
    'timbre_5_kurtosis','timbre_6_std','timbre_6_kurtosis','timbre_7_std','timbre_7_kurtosis',\
    'timbre_8_std','timbre_8_kurtosis','timbre_9_std','timbre_9_kurtosis','timbre_10_std',\
    'timbre_10_kurtosis','timbre_11_std','timbre_11_kurtosis','timbre_12_std','timbre_12_kurtosis']

    for i,item in enumerate(data_for_transformation[data_for_transformation.columns[:]]):
        if item in index_for_transformation:
            if np.min(data_for_transformation[item]) < 0:
                transformation_constants[item] = np.min(data_for_transformation[item])
            else:
                transformation_constants[item]=0
                data_for_transformation[item] = data_for_transformation[item].apply(lambda x: np.log(x+1))
    # MinMax scaling            
    scaler = MinMaxScaler() 
    data_log_minmax_transform = pd.DataFrame(data = data_for_transformation)
    data_log_minmax_transform[index] = scaler.fit_transform(data_for_transformation[index])           

    # Split the data into training and testing sets
    X_train1, X_test1, y_train1, y_test1 = train_test_split(data_log_minmax_transform[data_log_minmax_transform.columns[1:len(index)]], 
                                                        data1['track_popularity'], 
                                                        test_size = 0.2, 
                                                        random_state = 0)            
    # Build neural network model
    model = Sequential()
    model.add(Dense(200,activation='sigmoid',input_shape=(129,),kernel_regularizer=regularizers.l2(0.001),
                    activity_regularizer=regularizers.l1(0.001)))
    model.add(Dense(100,activation = 'softmax'))
    model.add(Dense(50,activation = 'relu'))
    model.add(Dense(32,activation='relu'))
    model.add(Dense(10,activation="relu"))
    model.add(Dense(1,activation='relu'))

    model.compile(loss=['mean_absolute_error'],optimizer='Adam',metrics=['mse','mae'])
    model.fit(X_train1.values, y_train1.values, epochs=100,batch_size = 5, verbose=0)
    score = model.evaluate(X_test1.values, y_test1.values, verbose=0)
    
    # Get data for the song, which song popularity will be predicted
    info = get_song_info(Track_URI)
    info0 = {k:v for k,v in info[0].items()}
    info_for_prediction = pd.DataFrame(((v) for k,v in info[0].items()),index = info0.keys()).T
    info_for_prediction = info_for_prediction[info_for_prediction.columns[5:-1]]
    # Log transformation on the data for popularity prediction
    for k,v in info_for_prediction.items():
        if k in transformation_constants.keys():
            info_for_prediction[k]=info_for_prediction[k].apply(lambda x: np.log(x+abs(transformation_constants[k])+1))
    index_for_prediction = index[1:]
    dataset_for_prediction=data_for_transformation[index_for_prediction]
    dataset_for_prediction=dataset_for_prediction.append(info_for_prediction)
    # MinMax Scaling on the data for popularity prediction
    info_log_minmax_transform = pd.DataFrame(data = dataset_for_prediction)
    info_log_minmax_transform[index_for_prediction] = scaler.fit_transform(dataset_for_prediction[index_for_prediction])
    # Predict song popularity
    predicted_track_popularity = model.predict(info_log_minmax_transform.tail(1))
    # Print final output
    print("--------------------------------------------------------------------------------")
    print("")
    print("Following Song is "+song_name)
    print("by "+artist_name)
    print("")
    
    print("Predicted Track Popularity is : " + str(predicted_track_popularity[0][0]) +"/100")
    print("")
    print("* Following model has RMSE(Root Mean Square Error) : " , np.sqrt(score[1]))
    print("  and MAE(Mean Absolute Error) : ", score[2])
    print("--------------------------------------------------------------------------------")

Please copy spotify URI from the spotify and put it in to function predict_song_popularity.
Model will provide predicted track popularity.

In [29]:
predict_song_popularity("49C2uDJ3QuCPop9o030V2O")

retrying ...1secs
retrying ...2secs
retrying ...3secs
retrying ...4secs
retrying ...5secs
--------------------------------------------------------------------------------

Following Song is No Sleep
by Bossfight

Predicted Track Popularity is : 37.084034/100

* Following model has RMSE(Root Mean Square Error) :  16.3609444620479
  and MAE(Mean Absolute Error) :  11.911048587773239
--------------------------------------------------------------------------------
