In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
import matplotlib.pyplot as plt
from sklearn import metrics
plt.rc("font", size=14)
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import seaborn as sn
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn.metrics import (accuracy_score, precision_score, recall_score, 
                             f1_score, classification_report, confusion_matrix,
                             roc_auc_score, roc_curve, matthews_corrcoef)
from sklearn.utils import resample

test_set = pd.read_csv("not100.csv")
train_set = pd.read_csv("usMerged.csv",encoding= 'unicode_escape')

#logreg = LogisticRegression()

col_names = ['danceability', 'energy','loudness',
       'acousticness', 'instrumentalness']
frames1 = [test_set, train_set]
data = pd.concat(frames1)
data = data.dropna()
X = data[col_names] 
y = data['class'] 

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=0)

logistic_regression= LogisticRegression()
logistic_regression.fit(X_train,y_train)
y_pred=logistic_regression.predict(X_test)
con_matrix = pd.crosstab(y_test, y_pred, rownames=['Actual'], colnames=['Predicted'])
sn.heatmap(con_matrix, annot=True)

print('Accuracy: ',metrics.accuracy_score(y_test, y_pred))
print('Recall: ',metrics.recall_score(y_test, y_pred))
print('Precision: ',metrics.precision_score(y_test, y_pred))
plt.show()


y_pred_prob = logistic_regression.predict_proba(X_test)[:,1]
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr, tpr)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.show()



Accuracy:  0.7667638483965015
Recall:  0.9457013574660633
Precision:  0.7545126353790613


<Figure size 640x480 with 2 Axes>

<Figure size 640x480 with 1 Axes>

In [2]:
print('Accuracy Score:', accuracy_score(y_test, y_pred))
y_pred_prob = logistic_regression.predict_proba(X_test)[:,1]
print('AUROC Score:',roc_auc_score(y_test, y_pred_prob))
print('MCC:', matthews_corrcoef(y_test, y_pred))
print('\n Clasification Report:\n', classification_report(y_test, y_pred))
cm =  confusion_matrix(y_test, y_pred)
print('\n Confusion Matrix:\n', cm)

Accuracy Score: 0.7667638483965015
AUROC Score: 0.840201023662933
MCC: 0.4715836909161573

 Clasification Report:
               precision    recall  f1-score   support

           0       0.82      0.44      0.57       122
           1       0.75      0.95      0.84       221

   micro avg       0.77      0.77      0.77       343
   macro avg       0.79      0.69      0.71       343
weighted avg       0.78      0.77      0.75       343


 Confusion Matrix:
 [[ 54  68]
 [ 12 209]]


In [3]:
df2 = pd.read_csv("mylist_fin.csv")
col_names = ['danceability', 'energy', 'loudness',
     'acousticness', 'instrumentalness']
mydf = df2[col_names]  

y2_pred=logistic_regression.predict(mydf)

print (df2)
print (y2_pred)

                                           track_name               artist  \
0   Running Up That Hill - Recorded at Spotify Stu...        First Aid Kit   
1                                      Mountain Sound  Of Monsters and Men   
2                                Breathing Underwater               Metric   
3                  Unsteady - Erich Lee Gravity Remix        X Ambassadors   
4                               Make You Feel My Love     Sleeping At Last   
5   Holding Out for a Hero - From the Trailer for ...  Nothing But Thieves   
6                                            Moondust         Jaymes Young   
7                                          All I Want             Kodaline   
8                                               Woman       Mumford & Sons   
9                                    Hem of Her Dress        First Aid Kit   
10                                         Half Light              BANNERS   
11                                       Turning Page     Sleepi

In [4]:
print('Accuracy: ',metrics.accuracy_score(y_test, y_pred))
print('Recall: ',metrics.recall_score(y_test, y_pred))
print('Precision: ',metrics.precision_score(y_test, y_pred))
plt.show()

Accuracy:  0.7667638483965015
Recall:  0.9457013574660633
Precision:  0.7545126353790613
