In [147]:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.naive_bayes import GaussianNB

In [148]:
df = pd.read_csv('/Users/bryanzhang/Desktop/career/projects/harmonic_horizons/data/cleaned_tracks2.csv')

In [149]:
# Multinomial Logistic with Ridge Regression
X = df.drop(columns=['mood', 'name'])
y = df['mood']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

model = LogisticRegression(penalty='l2', solver='lbfgs', max_iter=100000, class_weight='balanced')
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy*100:.2f}%')

Accuracy: 56.34%


In [150]:
# PCA
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

pca = PCA(n_components=14)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

model = LogisticRegression(penalty=None, solver='lbfgs', class_weight='balanced')
model.fit(X_train_pca, y_train)

y_pred = model.predict(X_test_pca)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy*100:.2f}%')

Accuracy: 56.46%


In [151]:
# LDA
lda = LinearDiscriminantAnalysis()
lda.fit(X_train, y_train)
y_pred = lda.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
print(f'Accuracy: {accuracy*100:.2f}%')
conf_matrix

Accuracy: 56.61%


array([[557,  12, 186,  62,  53],
       [ 40, 945,  61,  22, 145],
       [253,  45, 294,  94,  58],
       [ 92,  65, 103, 152,  59],
       [131, 174,  75,  42, 364]])

In [152]:
# QDA
qda = QuadraticDiscriminantAnalysis()
qda.fit(X_train, y_train)
y_pred = qda.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
print(f'Accuracy: {accuracy*100:.2f}%')
conf_matrix

Accuracy: 55.34%


array([[629,  21, 104,  57,  59],
       [ 53, 910,  73,  33, 144],
       [307,  47, 228, 114,  48],
       [145,  51,  83, 153,  39],
       [174, 147,  65,  60, 340]])

In [158]:
# KNN
X = df.drop(columns=['mood', 'name'])
y = df['mood']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

knn = KNeighborsClassifier(n_neighbors=25, weights='distance')
knn.fit(X_train_scaled, y_train)

y_pred = knn.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)
print(f'Accuracy: {accuracy*100:.2f}%')
report

Accuracy: 58.30%


'              precision    recall  f1-score   support\n\n       angry       0.54      0.64      0.59       870\n        calm       0.76      0.81      0.79      1213\n   energetic       0.43      0.41      0.42       744\n       happy       0.43      0.34      0.38       471\n         sad       0.54      0.48      0.51       786\n\n    accuracy                           0.58      4084\n   macro avg       0.54      0.54      0.54      4084\nweighted avg       0.58      0.58      0.58      4084\n'

In [154]:
# Naive Bayes
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
print(f'Accuracy: {accuracy*100:.2f}%')

Accuracy: 53.38%


In [155]:
# Decision Trees
clf = DecisionTreeClassifier(random_state=0)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
print(f'Accuracy: {accuracy*100:.2f}%')

Accuracy: 50.61%


False