In [1]:
import numpy as np
import pandas as pd
import joblib
from numpy import array
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split,cross_val_score,cross_validate,GridSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score, mean_absolute_error, mean_squared_error, multilabel_confusion_matrix, classification_report

In [2]:
df = pd.read_csv('SpotifyFeatures_new.csv',encoding='latin-1')
print(df)

# Drop columns
dropped_columns = ['genre', 'artist_name', 'track_name','track_id', 'mode', 'duration_ms', 'tempo', 'energy', 'acousticness']
df.drop(columns=dropped_columns, inplace=True)

# Normalize data
X, y = df.drop(columns='popularity'), df['popularity']
X = (X-X.min())/(X.max()-X.min())

y = ((y-1)//20)+1

print(y)
print(X)

          genre           artist_name  \
0        Reggae               Sanchez   
1          Jazz                  KLIM   
2         Blues              Montrose   
3         Movie  Idoles De La Musique   
4         Blues              Montrose   
...         ...                   ...   
169679      Rap           Post Malone   
169680  Hip-Hop          Daddy Yankee   
169681    Dance         Ariana Grande   
169682      Rap           Post Malone   
169683    Dance         Ariana Grande   

                                           track_name                track_id  \
0                                 Bring Back the Love  1gxvcZ9ZiYwVCtA5X6VSMP   
1                                     Baby I Love You  2DW2hWEmTBUc01EbXf0Pwk   
2                                       Crazy for You  1XXwzOFyOs0geeE7nOVIdq   
3                                     Un jour parfait  4nZ24G3vw52cqcfaHXypKa   
4                           What Are You Waitin' For?  3WSvl0YH2Budv3H5HnFFkD   
...                  

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state=5)

In [4]:
# load model from disk
nn_model = joblib.load('finalized_model.joblib')

In [5]:
# Make prediction
y_pred = nn_model.predict(X_train)

# Accuracy
print("\nAccuracy: ")
print(accuracy_score(y_train, y_pred))

# MSE
print('\nMSE: ')
print(mean_squared_error(y_train,y_pred))

# The map of label and their precision and recall
print("\nPrecision and recall:" )
print(classification_report(y_train, y_pred))


print("Confusion matrix: ")
matrix = confusion_matrix(y_train, y_pred)
multi_matrix = multilabel_confusion_matrix(y_train, y_pred)
print(multi_matrix)


Accuracy: 
0.5173742329480577

MSE: 
0.6406992419721983

Precision and recall:
              precision    recall  f1-score   support

           1       0.65      0.27      0.38     20567
           2       0.51      0.63      0.57     55273
           3       0.50      0.61      0.55     49241
           4       0.00      0.00      0.00     10270
           5       0.00      0.00      0.00       396

    accuracy                           0.52    135747
   macro avg       0.33      0.30      0.30    135747
weighted avg       0.49      0.52      0.49    135747

Confusion matrix: 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[[[112191   2989]
  [ 15011   5556]]

 [[ 47561  32913]
  [ 20482  34791]]

 [[ 56893  29613]
  [ 19356  29885]]

 [[125477      0]
  [ 10270      0]]

 [[135351      0]
  [   396      0]]]


In [6]:
from sklearn.metrics import classification_report

# Use the highest valued index as the output
def classifyOutput(output):
    return output


# Generate Confusion Matrix
def generateConfusionMatrix(y, y_true):
    truePos = [0,0,0,0,0,0]
    falsePos = [0,0,0,0,0,0]
    trueNeg = [0,0,0,0,0,0]
    falseNeg = [0,0,0,0,0,0]
    
    for i in range(len(y)):
        testVal = y[i]
        trueVal = y_true[i]
        if trueVal == testVal:
            truePos[trueVal] += 1
            
            for j in range(1,6):
                if j != trueVal:
                    trueNeg[j] += 1
        else:
            falsePos[testVal] += 1
            falseNeg[trueVal] += 1
            for j in range(1,6):
                if j != trueVal and j != testVal:
                    trueNeg[j] += 1
    
    return [trueNeg, falsePos, falseNeg, truePos]

# Print key for readability
key = dict()
for i in range(1,5):
    key[i] = f"{(i-1)*20} - {i*20-1}"
key[5] = f"{80} - {100}"
print("Key:")
print(key)
print("")


# Generate Confusion Matrix
y_predict = classifyOutput(nn_model.predict(X_test))
confusionMat = generateConfusionMatrix(y_predict, np.array(y_test))

# # Sanity check for checking precision match with classifyOutput
# for i in range(7):
#     tp = confusionMat[3][i]
#     fn = confusionMat[2][i]
#     fp = confusionMat[1][i]
#     tn = confusionMat[0][i]
#     if tp == 0:
#         print(0)
#         continue
#     print(tp/(tp+fp))

# Print confusion matrix
print("Confusion Matrix Structure:")
print(np.array([["True Negative","False Positive"],["False Negative","True Positive"]]), end = "\n\n")
    
for i in range(1,6):
    tp = confusionMat[3][i]
    fn = confusionMat[2][i]
    fp = confusionMat[1][i]
    tn = confusionMat[0][i]
    print("Confusion Matrix for " + key[i])
    print(np.array([[tn,fp],[fn,tp]]), end = "\n\n")

    
# Print classification report
test_output = np.asarray(y_predict)


print("Test Report")
print(classification_report(y_test, test_output, zero_division = 0))

Key:
{1: '0 - 19', 2: '20 - 39', 3: '40 - 59', 4: '60 - 79', 5: '80 - 100'}

Confusion Matrix Structure:
[['True Negative' 'False Positive']
 ['False Negative' 'True Positive']]

Confusion Matrix for 0 - 19
[[27997   791]
 [ 3762  1387]]

Confusion Matrix for 20 - 39
[[11839  8343]
 [ 5110  8645]]

Confusion Matrix for 40 - 59
[[14281  7334]
 [ 4885  7437]]

Confusion Matrix for 60 - 79
[[31323     0]
 [ 2614     0]]

Confusion Matrix for 80 - 100
[[33840     0]
 [   97     0]]

Test Report
              precision    recall  f1-score   support

           1       0.64      0.27      0.38      5149
           2       0.51      0.63      0.56     13755
           3       0.50      0.60      0.55     12322
           4       0.00      0.00      0.00      2614
           5       0.00      0.00      0.00        97

    accuracy                           0.51     33937
   macro avg       0.33      0.30      0.30     33937
weighted avg       0.49      0.51      0.48     33937

