In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('cleaned_data.csv')
df.head()

Unnamed: 0,track_ids,track_names,artists,first_artists,danceability,energy,loudness,mode,acousticness,instrumentalness,valence,tempo,mood
0,1k1Bqnv2R0uJXQN4u6LKYt,Ain't No Sunshine,['Bill Withers'],Bill Withers,0.527,0.415,-11.451,0,0.457,1.7e-05,0.515,78.169,happy
1,3zBhihYUHBmGd2bcQIobrF,(Sittin' On) the Dock of the Bay,['Otis Redding'],Otis Redding,0.768,0.367,-11.226,1,0.683,1.8e-05,0.532,103.621,happy
2,3SdTKo2uVsxFblQjpScoHy,Stand By Me,['Ben E. King'],Ben E. King,0.65,0.306,-9.443,1,0.57,7e-06,0.605,118.068,happy
3,3NfxSdJnVdon1axzloJgba,I Say a Little Prayer,['Aretha Franklin'],Aretha Franklin,0.592,0.355,-14.051,1,0.478,0.0,0.499,133.032,happy
4,4kP69y3GKHi9tXckfgp4bK,For Once In My Life,['Stevie Wonder'],Stevie Wonder,0.524,0.519,-11.903,1,0.195,0.0,0.847,110.121,happy


In [3]:
labels = df['mood'].copy()

columns_to_drop = ['track_ids', 'track_names', 'artists', 'first_artists', 'mood', 'instrumentalness']
cleaned_df = df.drop(columns=columns_to_drop)

cleaned_df.head()

Unnamed: 0,danceability,energy,loudness,mode,acousticness,valence,tempo
0,0.527,0.415,-11.451,0,0.457,0.515,78.169
1,0.768,0.367,-11.226,1,0.683,0.532,103.621
2,0.65,0.306,-9.443,1,0.57,0.605,118.068
3,0.592,0.355,-14.051,1,0.478,0.499,133.032
4,0.524,0.519,-11.903,1,0.195,0.847,110.121


In [16]:
correlation_matrix = cleaned_df.corr()
print(correlation_matrix)

              danceability    energy  loudness      mode  acousticness  \
danceability      1.000000  0.166568  0.140094 -0.064893     -0.209244   
energy            0.166568  1.000000  0.744870 -0.133596     -0.746826   
loudness          0.140094  0.744870  1.000000 -0.079051     -0.590361   
mode             -0.064893 -0.133596 -0.079051  1.000000      0.094436   
acousticness     -0.209244 -0.746826 -0.590361  0.094436      1.000000   
valence           0.462757  0.437580  0.221012  0.004307     -0.336262   
tempo            -0.168354  0.206758  0.119144 -0.038581     -0.144711   

               valence     tempo  
danceability  0.462757 -0.168354  
energy        0.437580  0.206758  
loudness      0.221012  0.119144  
mode          0.004307 -0.038581  
acousticness -0.336262 -0.144711  
valence       1.000000  0.046120  
tempo         0.046120  1.000000  


In [4]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

clf = DecisionTreeClassifier()
params = {"max_depth": [4, 6, 8],
          "max_features": [4, 6, 8]}
grid_search = GridSearchCV(clf, params, cv = 5, scoring = 'accuracy', n_jobs = -1)
y_pred = cross_val_predict(grid_search, cleaned_df, labels, cv = 5)

accuracy = accuracy_score(labels, y_pred)
print("Accuracy: ", accuracy)
print("Classification Report:")
print(classification_report(labels, y_pred))

conf_matrix = confusion_matrix(labels, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

Accuracy:  0.7676712328767124
Classification Report:
              precision    recall  f1-score   support

       happy       0.78      0.76      0.77       929
         sad       0.75      0.78      0.77       896

    accuracy                           0.77      1825
   macro avg       0.77      0.77      0.77      1825
weighted avg       0.77      0.77      0.77      1825

Confusion Matrix:
[[702 227]
 [197 699]]


In [5]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline

scaler = StandardScaler()
pca = PCA()
knn = KNeighborsClassifier(n_neighbors = 7)
pipeline = Pipeline([
    ('scaler', scaler),
    ('pca', pca),
    ('knn', knn)
])

param_grid = {
    'pca__n_components': list(range(2, 8)),
    'knn__n_neighbors': list(range(2, 5))
}

grid_search = GridSearchCV(pipeline, param_grid, cv = 5, n_jobs = -1)
y_pred = cross_val_predict(grid_search, cleaned_df, labels, cv = 5)

accuracy = accuracy_score(labels, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(labels, y_pred))

conf_matrix = confusion_matrix(labels, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

Accuracy: 0.7309589041095891
Classification Report:
              precision    recall  f1-score   support

       happy       0.74      0.72      0.73       929
         sad       0.72      0.75      0.73       896

    accuracy                           0.73      1825
   macro avg       0.73      0.73      0.73      1825
weighted avg       0.73      0.73      0.73      1825

Confusion Matrix:
[[666 263]
 [228 668]]


In [17]:
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
import warnings

# Define the pipeline
pipeline_mlp = Pipeline([
    ('scaler', StandardScaler()),
    ('mlp', MLPClassifier(max_iter = 20, solver = 'adam', random_state = 8))
])

# Define the parameter grid for grid search
param_grid_mlp = {
    'mlp__hidden_layer_sizes': [5, 10, 15, 20],
    'mlp__activation': ['logistic', 'tanh', 'relu']
}

# Perform grid search
grid_search_mlp = GridSearchCV(pipeline_mlp, param_grid_mlp, cv = 5, n_jobs = -1)
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    y_pred = cross_val_predict(grid_search_mlp, cleaned_df, labels, cv = 5)

accuracy = accuracy_score(labels, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(labels, y_pred))

conf_matrix = confusion_matrix(labels, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

Accuracy: 0.776986301369863
Classification Report:
              precision    recall  f1-score   support

       happy       0.79      0.77      0.78       929
         sad       0.76      0.79      0.78       896

    accuracy                           0.78      1825
   macro avg       0.78      0.78      0.78      1825
weighted avg       0.78      0.78      0.78      1825

Confusion Matrix:
[[712 217]
 [190 706]]


In [18]:
from sklearn.ensemble import RandomForestClassifier

pipeline_rf = Pipeline([
    ('scaler', StandardScaler()),
    ('rf', RandomForestClassifier())
])

param_grid_rf = {
    'rf__max_depth': list(range(2, 8)),
    'rf__max_features': ["sqrt", "log2"]
}

grid_search_rf = GridSearchCV(pipeline_rf, param_grid_rf, cv=5, n_jobs=-1)
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    y_pred_rf = cross_val_predict(grid_search_rf, cleaned_df, labels, cv=5)

accuracy_rf = accuracy_score(labels, y_pred_rf)
print("Accuracy:", accuracy_rf)
print("Classification Report:")
print(classification_report(labels, y_pred_rf))

print("Confusion Matrix:")
print(confusion_matrix(labels, y_pred_rf))


Accuracy: 0.7627397260273973
Classification Report:
              precision    recall  f1-score   support

       happy       0.78      0.74      0.76       929
         sad       0.74      0.79      0.77       896

    accuracy                           0.76      1825
   macro avg       0.76      0.76      0.76      1825
weighted avg       0.76      0.76      0.76      1825

Confusion Matrix:
[[686 243]
 [190 706]]


In [15]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import cross_val_predict
import numpy as np

clf = AdaBoostClassifier()
params = {
    'n_estimators': [50, 100, 150, 200]
}
grid_search = GridSearchCV(clf, param_grid, cv = 5, scoring = 'accuracy')
y_pred = cross_val_predict(grid_search, cleaned_df, labels, cv = 5)

accuracy = accuracy_score(labels, y_pred)
print("Accuracy: ", accuracy)
print("Classification Report:")
print(classification_report(labels, y_pred))

conf_matrix = confusion_matrix(labels, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

Accuracy:  0.7556164383561644
Classification Report:
              precision    recall  f1-score   support

       happy       0.77      0.74      0.75       929
         sad       0.74      0.77      0.76       896

    accuracy                           0.76      1825
   macro avg       0.76      0.76      0.76      1825
weighted avg       0.76      0.76      0.76      1825

Confusion Matrix:
[[686 243]
 [203 693]]
