In [104]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import confusion_matrix
import numpy as np
from sklearn import svm
from sklearn.model_selection import train_test_split
import random

In [105]:
df = pd.read_csv('music_features.csv')

In [None]:
print(df.isnull().sum())

In [None]:
label_encoder = LabelEncoder()
df['label_encoded'] = label_encoder.fit_transform(df['label'])
label_mapping = dict(zip(label_encoder.transform(label_encoder.classes_), label_encoder.classes_))
print("Label mapping:", label_mapping)

In [None]:
data_for_corr = df.drop(columns=['filename', 'label'])

corr_matrix = data_for_corr.corr()

plt.figure(figsize=(15, 10))
sns.heatmap(corr_matrix, annot=False, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Matrix of Music Features')
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='spectral_centroid', y='spectral_bandwidth', hue='label')
plt.title('Spectral Centroid vs Spectral Bandwidth')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.boxplot(data=df, x='label', y='mfcc1')
plt.title('Distribution of MFCC1 by Genre')
plt.xticks(rotation=90)
plt.show()

In [112]:
features_to_standardize = [
    'tempo', 'chroma_stft', 'rmse', 'spectral_centroid', 
    'spectral_bandwidth', 'rolloff', 'zero_crossing_rate'
] + [f'mfcc{i}' for i in range(1, 21)]

for feature in features_to_standardize:
    df[feature] = (df[feature] - df[feature].mean()) / df[feature].std()

In [None]:
X = df[['tempo', 'chroma_stft', 'rmse', 'spectral_centroid', 'spectral_bandwidth', 'rolloff'] + 
       [f'mfcc{i}' for i in range(1, 21)]]
y = df['label_encoded']

X_train, X_test, y_train, y_test_logistic = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

lm = LogisticRegression(class_weight='balanced', multi_class='multinomial', max_iter=1000)
lm.fit(X_train, y_train)

y_pred_logistic = lm.predict(X_test)

print("Logistic Regression Results on Test Set")
p, r, f, s = precision_recall_fscore_support(y_test_logistic, y_pred_logistic, labels=np.unique(y))
for label, genre in label_mapping.items():
    print(f"Genre: {genre}")
    print(f"  Precision: {p[label]:.4f}")
    print(f"  Recall: {r[label]:.4f}")
    print(f"  F-score: {f[label]:.4f}")
    print(f"  Support: {s[label]}")

In [None]:
X = df[['tempo', 'chroma_stft', 'rmse', 'spectral_centroid', 'spectral_bandwidth', 'rolloff'] + 
       [f'mfcc{i}' for i in range(1, 21)]]
y = df['label_encoded']

X_train, X_test, y_train, y_test_svm_linear = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

clf = svm.SVC(kernel='linear', class_weight='balanced')
clf.fit(X_train, y_train)

y_pred_svm_linear = clf.predict(X_test)

print("SVM - Linear Results on Test Set")
p, r, f, s = precision_recall_fscore_support(y_test_svm_linear, y_pred_svm_linear, labels=np.unique(y))
for label, genre in label_mapping.items():
    print(f"Genre: {genre}")
    print(f"  Precision: {p[label]:.4f}")
    print(f"  Recall: {r[label]:.4f}")
    print(f"  F-score: {f[label]:.4f}")
    print(f"  Support: {s[label]}")

In [None]:
X = df[['tempo', 'chroma_stft', 'rmse', 'spectral_centroid', 'spectral_bandwidth', 'rolloff'] + 
       [f'mfcc{i}' for i in range(1, 21)]]
y = df['label_encoded']

X_train, X_test, y_train, y_test_svm_balanced = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

clf = svm.SVC(kernel='rbf', class_weight='balanced')
clf.fit(X_train, y_train)

y_pred_svm_balanced = clf.predict(X_test)

print("SVM - RBF Results on Test Set with Balanced Weights:")
p, r, f, s = precision_recall_fscore_support(y_test_svm_balanced, y_pred_svm_balanced, labels=np.unique(y))
for label, genre in label_mapping.items():
    print(f"Genre: {genre}")
    print(f"  Precision: {p[label]:.4f}")
    print(f"  Recall: {r[label]:.4f}")
    print(f"  F-score: {f[label]:.4f}")
    print(f"  Support: {s[label]}")


In [None]:
X = df[['tempo', 'chroma_stft', 'rmse', 'spectral_centroid', 'spectral_bandwidth', 'rolloff'] + 
       [f'mfcc{i}' for i in range(1, 21)]]
y = df['label_encoded']

custom_weights = {0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 2, 9: 2}

X_train, X_test, y_train, y_test_svm_rbf = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

clf = svm.SVC(kernel='rbf', class_weight=custom_weights)
clf.fit(X_train, y_train)

y_pred_svm_rbf = clf.predict(X_test)

print("SVM - RBF Results on Test Set with Custom Weights:")
p, r, f, s = precision_recall_fscore_support(y_test_svm_rbf, y_pred_svm_rbf, labels=np.unique(y))
label_mapping = {0: 'blues', 1: 'classical', 2: 'country', 3: 'disco', 4: 'hiphop', 5: 'jazz', 6: 'metal', 7: 'pop', 8: 'reggae', 9: 'rock'}
for label, genre in label_mapping.items():
    print(f"Genre: {genre}")
    print(f"  Precision: {p[label]:.4f}")
    print(f"  Recall: {r[label]:.4f}")
    print(f"  F-score: {f[label]:.4f}")
    print(f"  Support: {s[label]}")

In [None]:
target_names = ['Blues', 'Classical', 'Country', 'Disco', 'Hiphop', 'Jazz', 
                'Metal', 'Pop', 'Reggae', 'Rock']

mat_logistic = confusion_matrix(y_test_logistic, y_pred_logistic)
mat_svm_linear = confusion_matrix(y_test_svm_linear, y_pred_svm_linear)
mat_svm_rbf_balanced = confusion_matrix(y_test_svm_balanced, y_pred_svm_balanced)
mat_svm_rbf = confusion_matrix(y_test_svm_rbf, y_pred_svm_rbf)

plt.figure(figsize=(8, 6))
sns.heatmap(mat_logistic.T, square=True, annot=True, fmt='d', cbar=False,
            xticklabels=target_names, yticklabels=target_names)
plt.xlabel('True Label')
plt.ylabel('Predicted Label')
plt.title('Confusion Matrix - Logistic Regression')
plt.show()

plt.figure(figsize=(8, 6))
sns.heatmap(mat_svm_linear.T, square=True, annot=True, fmt='d', cbar=False,
            xticklabels=target_names, yticklabels=target_names)
plt.xlabel('True Label')
plt.ylabel('Predicted Label')
plt.title('Confusion Matrix - SVM (Linear Kernel)')
plt.show()

plt.figure(figsize=(8, 6))
sns.heatmap(mat_svm_rbf_balanced.T, square=True, annot=True, fmt='d', cbar=False,
            xticklabels=target_names, yticklabels=target_names)
plt.xlabel('True Label')
plt.ylabel('Predicted Label')
plt.title('Confusion Matrix - SVM (RBF Kernel) Balanced')
plt.show()

plt.figure(figsize=(8, 6))
sns.heatmap(mat_svm_rbf.T, square=True, annot=True, fmt='d', cbar=False,
            xticklabels=target_names, yticklabels=target_names)
plt.xlabel('True Label')
plt.ylabel('Predicted Label')
plt.title('Confusion Matrix - SVM (RBF Kernel) Weighted')
plt.show()