In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('cleaned_data.csv')
df.head()

In [None]:
labels = df['mood'].copy()

columns_to_drop = ['track_ids', 'track_names', 'artists', 'first_artists', 'mood', 'instrumentalness']
cleaned_df = df.drop(columns=columns_to_drop)

cleaned_df.head()

In [None]:
correlation_matrix = cleaned_df.corr()
print(correlation_matrix)

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

clf = DecisionTreeClassifier()
params = {"max_depth": [4, 6, 8],
          "max_features": [4, 6, 8]}
grid_search = GridSearchCV(clf, params, cv = 5, scoring = 'accuracy', n_jobs = -1)
y_pred = cross_val_predict(grid_search, cleaned_df, labels, cv = 5)

accuracy = accuracy_score(labels, y_pred)
print("Accuracy: ", accuracy)
print("Classification Report:")
print(classification_report(labels, y_pred))

conf_matrix = confusion_matrix(labels, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline

scaler = StandardScaler()
pca = PCA()
knn = KNeighborsClassifier(n_neighbors = 7)
pipeline = Pipeline([
    ('scaler', scaler),
    ('pca', pca),
    ('knn', knn)
])

param_grid = {
    'pca__n_components': list(range(2, 8)),
    'knn__n_neighbors': list(range(2, 5))
}

grid_search = GridSearchCV(pipeline, param_grid, cv = 5, n_jobs = -1)
y_pred = cross_val_predict(grid_search, cleaned_df, labels, cv = 5)

accuracy = accuracy_score(labels, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(labels, y_pred))

conf_matrix = confusion_matrix(labels, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
import warnings

# Define the pipeline
pipeline_mlp = Pipeline([
    ('scaler', StandardScaler()),
    ('mlp', MLPClassifier(max_iter = 20, solver = 'adam', random_state = 8))
])

# Define the parameter grid for grid search
param_grid_mlp = {
    'mlp__hidden_layer_sizes': [5, 10, 15, 20],
    'mlp__activation': ['logistic', 'tanh', 'relu']
}

# Perform grid search
grid_search_mlp = GridSearchCV(pipeline_mlp, param_grid_mlp, cv = 5, n_jobs = -1)
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    y_pred = cross_val_predict(grid_search_mlp, cleaned_df, labels, cv = 5)

accuracy = accuracy_score(labels, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(labels, y_pred))

conf_matrix = confusion_matrix(labels, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

In [None]:
from sklearn.ensemble import RandomForestClassifier

pipeline_rf = Pipeline([
    ('scaler', StandardScaler()),
    ('rf', RandomForestClassifier())
])

param_grid_rf = {
    'rf__max_depth': list(range(2, 8)),
    'rf__max_features': ["sqrt", "log2"]
}

grid_search_rf = GridSearchCV(pipeline_rf, param_grid_rf, cv=5, n_jobs=-1)
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    y_pred_rf = cross_val_predict(grid_search_rf, cleaned_df, labels, cv=5)

accuracy_rf = accuracy_score(labels, y_pred_rf)
print("Accuracy:", accuracy_rf)
print("Classification Report:")
print(classification_report(labels, y_pred_rf))

print("Confusion Matrix:")
print(confusion_matrix(labels, y_pred_rf))


In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import cross_val_predict

clf = AdaBoostClassifier()
params = {
    'n_estimators': [50, 100, 150, 200]
}
grid_search = GridSearchCV(clf, params, cv = 5, scoring = 'accuracy')
y_pred = cross_val_predict(grid_search, cleaned_df, labels, cv = 5)

accuracy = accuracy_score(labels, y_pred)
print("Accuracy: ", accuracy)
print("Classification Report:")
print(classification_report(labels, y_pred))

conf_matrix = confusion_matrix(labels, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

In [None]:
import pickle

scaler = StandardScaler()
scaler.fit(cleaned_df) # Fit the StandardScaler with feature names to get rid of warning
pipeline_mlp = Pipeline([
    ('scaler', scaler),
    ('mlp', MLPClassifier(max_iter=20, solver='adam', random_state=8))
])

param_grid_mlp = {
    'mlp__hidden_layer_sizes': [5, 10, 15, 20],
    'mlp__activation': ['logistic', 'tanh', 'relu']
}

grid_search_mlp = GridSearchCV(pipeline_mlp, param_grid_mlp, cv=5, n_jobs=-1)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    mlp_model = grid_search_mlp.fit(cleaned_df.values, labels)

filename = 'finalized_model.sav'
pickle.dump(mlp_model, open(filename, 'wb'))


In [None]:
import os
import sys
print(os.path.join(sys.prefix, 'Scripts'))