In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_recall_fscore_support
import numpy as np
from sklearn import svm
import random

In [None]:
df = pd.read_csv('core/music_features.csv')

In [None]:
print(df.isnull().sum())

In [None]:
label_encoder = LabelEncoder()
df['label_encoded'] = label_encoder.fit_transform(df['label'])
label_mapping = dict(zip(label_encoder.transform(label_encoder.classes_), label_encoder.classes_))
print("Label mapping:", label_mapping)

In [None]:
# data_for_corr = df.drop(columns=['filename', 'label'])

# corr_matrix = data_for_corr.corr()

# plt.figure(figsize=(15, 10))
# sns.heatmap(corr_matrix, annot=False, cmap='coolwarm', linewidths=0.5)
# plt.title('Correlation Matrix of Music Features')
# plt.show()

In [None]:
# plt.figure(figsize=(8, 6))
# sns.scatterplot(data=df, x='spectral_centroid', y='spectral_bandwidth', hue='label')
# plt.title('Spectral Centroid vs Spectral Bandwidth')
# plt.show()

In [None]:
# plt.figure(figsize=(10, 6))
# sns.boxplot(data=df, x='label', y='mfcc1')
# plt.title('Distribution of MFCC1 by Genre')
# plt.xticks(rotation=90)
# plt.show()

In [None]:
# selected_features = ['tempo', 'chroma_stft', 'rmse', 'spectral_centroid', 'spectral_bandwidth', 'label']
# sns.pairplot(df[selected_features], hue='label', palette='Set2')
# plt.show()

In [None]:
features_to_standardize = [
    'tempo', 'chroma_stft', 'rmse', 'spectral_centroid', 
    'spectral_bandwidth', 'rolloff', 'zero_crossing_rate'
] + [f'mfcc{i}' for i in range(1, 21)]

for feature in features_to_standardize:
    df[feature] = (df[feature] - df[feature].mean()) / df[feature].std()

In [None]:
X = df[['tempo', 'chroma_stft', 'rmse', 'spectral_centroid', 'spectral_bandwidth', 'rolloff'] + 
       [f'mfcc{i}' for i in range(1, 21)]]
y = df['label_encoded']

lm = LogisticRegression(class_weight='balanced', multi_class='multinomial', max_iter=1000)
lm.fit(X, y)

# print("Intercept:", lm.intercept_)
# print("Coefficients:", lm.coef_)

y_pred = lm.predict(X)

# print("Predicted Labels:", y_pred)

# print('Predicted classes:', np.unique(y_pred))

print("Logistic Results")
p, r, f, s = precision_recall_fscore_support(y, y_pred, labels=np.unique(y))
for label, genre in label_mapping.items():
    print(f"Genre: {genre}")
    print(f"  Precision: {p[label]:.4f}")
    print(f"  Recall: {r[label]:.4f}")
    print(f"  F-score: {f[label]:.4f}")
    print(f"  Support: {s[label]}")

In [None]:
# colors = ['r', 'b', 'g', 'c', 'm', 'y', 'k', 'orange', 'purple', 'pink']
# color_map = [colors[label] for label in y]

# plt.figure(figsize=(10, 6))
# plt.scatter(df['spectral_centroid'], df['spectral_bandwidth'], c=color_map, s=3)
# plt.xlabel('Spectral Centroid')
# plt.ylabel('Spectral Bandwidth')
# plt.title('Scatter Plot of Spectral Centroid vs Spectral Bandwidth, Colored by Genre')
# plt.show()

In [None]:
X = df[['tempo', 'chroma_stft', 'rmse', 'spectral_centroid', 'spectral_bandwidth', 'rolloff'] + 
       [f'mfcc{i}' for i in range(1, 21)]]
y = df['label_encoded']

clf = svm.SVC(kernel='linear', class_weight='balanced')
clf.fit(X, y)

y_pred = clf.predict(X)

print("SVM - Linear Results")
p, r, f, s = precision_recall_fscore_support(y, y_pred, labels=np.unique(y))
for label, genre in label_mapping.items():
    print(f"Genre: {genre}")
    print(f"  Precision: {p[label]:.4f}")
    print(f"  Recall: {r[label]:.4f}")
    print(f"  F-score: {f[label]:.4f}")
    print(f"  Support: {s[label]}")

In [None]:

X = df[['tempo', 'chroma_stft', 'rmse', 'spectral_centroid', 'spectral_bandwidth', 'rolloff'] + 
       [f'mfcc{i}' for i in range(1, 21)]]
y = df['label_encoded']

clf = svm.SVC(kernel='rbf', class_weight='balanced')
clf.fit(X.values, y)

y_pred = clf.predict(X)
p, r, f, s = precision_recall_fscore_support(y, y_pred, labels=np.unique(y))
print("SVM - RBF Results Balanced Weights:")
for label, genre in label_mapping.items():
    print(f"Genre: {genre}")
    print(f"  Precision: {p[label]:.4f}")
    print(f"  Recall: {r[label]:.4f}")
    print(f"  F-score: {f[label]:.4f}")
    print(f"  Support: {s[label]}")


In [None]:
X = df[['tempo', 'chroma_stft', 'rmse', 'spectral_centroid', 'spectral_bandwidth', 'rolloff'] + 
       [f'mfcc{i}' for i in range(1, 21)]]
y = df['label_encoded']

# Label mapping: {0: 'blues', 1: 'classical', 2: 'country', 3: 'disco', 4: 'hiphop', 5: 'jazz', 6: 'metal', 7: 'pop', 8: 'reggae', 9: 'rock'}
custom_weights = {0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 2, 9: 2}

clf = svm.SVC(kernel='rbf', class_weight=custom_weights)
clf.fit(X.values, y)

y_pred = clf.predict(X)
p, r, f, s = precision_recall_fscore_support(y, y_pred, labels=np.unique(y))
print("SVM - RBF Results with custom weights:")
for label, genre in label_mapping.items():
    print(f"Genre: {genre}")
    print(f"  Precision: {p[label]:.4f}")
    print(f"  Recall: {r[label]:.4f}")
    print(f"  F-score: {f[label]:.4f}")
    print(f"  Support: {s[label]}")