In [83]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from os import chdir
import numpy as np
import pickle

In [48]:
def clipAndNormalize(features):
    #clip the features to the range of the training data
    features['danceability'] = features['danceability'].clip(lower=0.22718080000000002, upper=0.906)
    features['energy'] = features['energy'].clip(lower=0.03545904, upper=0.978)
    features['loudness'] = features['loudness'].clip(lower=-26.4981552, upper=-1.6015904000000007)
    features['speechiness'] = features['speechiness'].clip(lower=0.0257, upper=0.46640959999999926)
    features['acousticness'] = features['acousticness'].clip(lower=8.353136000000001e-05, upper=0.9884095999999992)
    features['instrumentalness'] = features['instrumentalness'].clip(lower=0.0, upper=0.956)
    features['liveness'] = features['liveness'].clip(lower=0.0494, upper=0.697)
    features['valence'] = features['valence'].clip(lower=0.0382, upper=0.923)
    features['tempo'] = features['tempo'].clip(lower=63.7631808, upper=188.00344319999996)
    features['duration_ms'] = features['duration_ms'].clip(lower=88264.8768, upper=372339.1727999991)
    features['time_signature'] = features['time_signature'].clip(lower=3.0, upper=5.0)
    
    #normalize the data
    scaler = pickle.load(open('scaler2.pkl', 'rb'))

    #if id is a column, drop it
    if 'id' in features.columns:
        #fit on all columns except the track id
        rawfeatures = features.drop(['id'], axis=1)
    else:
        rawfeatures = features
    preprocessedFeatures = scaler.transform(features)
    preprocessedFeaturesDF = pd.DataFrame(preprocessedFeatures, columns=rawfeatures.columns)

    '''#convert to dictionary, with track id as key
    preprocessedFeatures = pd.DataFrame(preprocessedFeatures, columns=rawfeatures.columns)
    preprocessedFeatures['id']= features['id']
    preprocessedFeatures = preprocessedFeatures.set_index('id').T.to_dict('list')'''
    return preprocessedFeaturesDF, preprocessedFeatures

In [3]:
chdir('C:/Users/mlar5/OneDrive/Desktop/Code Folder/Python Projects/IRL projects/Aspire - Affective Computing Project/Playlists Data/Audio Features/emotion joint data')

In [4]:
emotionsDF = pd.read_csv('factorizedEmotionsDF1_readyForTransform.csv')

In [6]:
emotionsDF['mood_code'].value_counts()

7    3783
6    1218
0    1020
5     773
3     728
2     631
4     405
1     279
Name: mood_code, dtype: int64

In [55]:
# create a new df with only up to 500 songs per mood_code
# this is to balance the data

balancedDF = pd.DataFrame(columns=emotionsDF.columns)

for i in emotionsDF['mood_code'].unique():
    df = emotionsDF[emotionsDF['mood_code']==i]
    #if the value count of the mood_code is larger than 500, sample 500
    if df['mood_code'].value_counts()[i] > 500:
        df = df.sample(n=500, random_state=42)
    #if the value count of the mood_code is less than 500, sample the value count
    else:
        df = df.sample(n=df['mood_code'].value_counts()[i])
    balancedDF = pd.concat([balancedDF, df])

balancedDF['mood_code'].value_counts()

6    500
5    500
7    500
2    500
3    500
0    500
4    405
1    279
Name: mood_code, dtype: int64

In [56]:
balancedDF.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,uri,duration_ms,time_signature,song,mood,genre,mood_code
232,0.457,0.932,5,-3.984,1,0.0849,0.0577,0.00014,0.587,0.659,189.88,spotify:track:1IIxXYokMte6KEQHxJynu9,197053,4,A Girl Is a Gun,angry,EDM,1
126,0.733,0.93,11,-5.772,1,0.139,0.175,0.0133,0.0662,0.691,139.927,spotify:track:77VHb191LLkpsYJTS2cgEc,126062,4,Team,angry,metal,1
118,0.445,0.907,11,-3.894,0,0.0773,0.00161,0.0,0.403,0.71,165.069,spotify:track:55Fpeuuc2sbQiy74eA1gTt,187880,4,Wolf in Sheep's Clothing,angry,metal,1
201,0.656,0.689,6,-8.698,1,0.441,0.174,0.00367,0.0874,0.282,180.201,spotify:track:2atHJbCPSlFMeb4mlnK1L5,145333,4,I Wash My Hands In The Blood,angry,EDM,1
140,0.88,0.777,7,-6.875,0,0.125,0.164,0.0171,0.0957,0.974,154.955,spotify:track:46M2hXnaQpueG7vSvgVtVH,93894,4,GTG,angry,metal,1


In [57]:
rawfeatures = balancedDF.drop(['uri', 'song','mood','genre','mood_code'], axis=1)

In [58]:
rawfeaturesDF, rawfeatures = clipAndNormalize(rawfeatures)

In [59]:
rawfeaturesDF.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,-1.045141,1.505777,-0.064537,0.930482,0.805606,-0.106761,-0.888148,-0.556564,3.408128,1.208816,2.237071,0.043283,0.187342
1,0.820226,1.497945,1.619457,0.596268,0.805606,0.451844,-0.549409,-0.519087,-0.834554,1.35054,0.632665,-1.235463,0.187342
2,-1.126244,1.407886,1.619457,0.947305,-1.241301,-0.185234,-1.050124,-0.556962,1.909177,1.434689,1.471703,-0.121948,0.187342
3,0.299815,0.554282,0.216129,0.049339,0.805606,3.57012,-0.552297,-0.546511,-0.661849,-0.46088,1.976688,-0.888339,0.187342
4,1.813736,0.898856,0.496795,0.390095,-1.241301,0.307288,-0.581175,-0.508265,-0.594234,2.378045,1.134179,-1.814898,0.187342


In [60]:
y = balancedDF['mood_code']

In [61]:
#set it to categorical
y = y.astype('category')

In [62]:
X_train_standard, X_test_standard, y_train_standard, y_test_standard = train_test_split(rawfeatures, y, test_size=0.2, random_state=42, stratify=y)


In [123]:

# Initialize the MLP classifier
mlp = MLPClassifier(hidden_layer_sizes=(256,128),random_state=42,early_stopping=True)

In [127]:
# Train the model on the resampled data
mlp.fit(X_train_standard, y_train_standard)

# Make predictions on the test set
y_pred = mlp.predict(X_test_standard)

# Evaluate the model performance of micro-averaged F1 score

print(classification_report(y_test_standard, y_pred))


              precision    recall  f1-score   support

           0       0.33      0.41      0.37       100
           1       0.57      0.23      0.33        56
           2       0.54      0.77      0.64       100
           3       0.39      0.57      0.47       100
           4       0.56      0.31      0.40        81
           5       0.37      0.30      0.33       100
           6       0.56      0.66      0.61       100
           7       0.33      0.20      0.25       100

    accuracy                           0.45       737
   macro avg       0.46      0.43      0.42       737
weighted avg       0.45      0.45      0.43       737



In [128]:
offByOne(y_test_standard, y_pred)

              precision    recall  f1-score   support

           0       0.46      0.49      0.48       100
           1       0.84      0.66      0.74        56
           2       0.70      0.89      0.78       100
           3       0.54      0.68      0.60       100
           4       0.78      0.58      0.67        81
           5       0.56      0.47      0.51       100
           6       0.85      0.87      0.86       100
           7       0.72      0.63      0.67       100

    accuracy                           0.66       737
   macro avg       0.68      0.66      0.66       737
weighted avg       0.67      0.66      0.66       737



In [139]:
#svm = SVC(kernel='linear', class_weight='balanced', random_state=42)
svm =SVC(kernel='poly', degree=3,class_weight='balanced', random_state=42)


In [140]:
# Train the model
svm.fit(X_train_standard, y_train_standard)

# Make predictions on the test set
y_pred_SVM = svm.predict(X_test_standard)

# Evaluate the model performance
print(classification_report(y_test_standard, y_pred_SVM))

              precision    recall  f1-score   support

           0       0.31      0.51      0.38       100
           1       0.43      0.38      0.40        56
           2       0.56      0.54      0.55       100
           3       0.47      0.55      0.51       100
           4       0.42      0.47      0.44        81
           5       0.40      0.30      0.34       100
           6       0.61      0.58      0.59       100
           7       0.26      0.12      0.16       100

    accuracy                           0.43       737
   macro avg       0.43      0.43      0.42       737
weighted avg       0.43      0.43      0.42       737



In [126]:
def offByOne(y_test_standard, y_pred):
    #compare y_test_standard with y_pred_list. If y_pred_list is +-1 from y_test_standard, then it change it to be the same as y_test_standard
    y_test_standard_list=list(y_test_standard)
    y_pred_list = list(y_pred)
    for id in range(len(y_test_standard_list)):
        if y_test_standard_list[id] != 0 and y_test_standard_list[id] != 7:
            if y_pred_list[id] == y_test_standard_list[id] - 1 or y_pred_list[id] == y_test_standard_list[id] + 1:
                y_pred_list[id] = y_test_standard_list[id]
        elif y_test_standard_list[id] == 0:
            if y_pred_list[id] ==  1 or y_pred_list[id] == 7:
                y_pred_list[id] = y_test_standard_list[id]
        elif y_test_standard_list[id] == 7:
            if y_pred_list[id] ==  0 or y_pred_list[id] == 6:
                y_pred_list[id] = y_test_standard_list[id]
    print(classification_report(y_test_standard_list, y_pred_list))
    return

In [122]:
offByOne(y_test_standard, y_pred_SVM)

              precision    recall  f1-score   support

           0       0.42      0.60      0.50       100
           1       0.70      0.66      0.68        56
           2       0.76      0.74      0.75       100
           3       0.65      0.71      0.68       100
           4       0.61      0.67      0.64        81
           5       0.60      0.49      0.54       100
           6       0.90      0.79      0.84       100
           7       0.78      0.59      0.67       100

    accuracy                           0.66       737
   macro avg       0.68      0.66      0.66       737
weighted avg       0.68      0.66      0.66       737

