# Supervised Learning Music Genre Classification

## Import Statements

Start by importing necessary libraries.

In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
import soundfile as sf
import joblib

## Import Data from prepared CSV

In [2]:
data = pd.read_csv('all_genres_audio_features.csv')
data = data.drop(columns= ['filename','start','end']) 

genres = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]

data.shape

(9981, 58)

Check for duplicates. 

In [3]:
def check_duplicates_or_nulls(data):
    duplicates = data.duplicated().sum() > 0
    nulls = data.isnull().sum().sum() > 0
    return duplicates or nulls

if check_duplicates_or_nulls(data):
    print('Duplicates or null values found in data')
else:
    print('No duplicates or null values found in data')

Duplicates or null values found in data


Deal with strings in features

In [4]:
# Clean the 'tempo' column
data['tempo'] = data['tempo'].str.strip('[]').astype(float)

# Verify the changes
data.describe(include='all')

Unnamed: 0,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,rolloff_mean,rolloff_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,genre
count,9981.0,9981.0,9981.0,9981.0,9981.0,9981.0,9981.0,9981.0,9981.0,9981.0,...,9981.0,9981.0,9981.0,9981.0,9981.0,9981.0,9981.0,9981.0,9981.0,9981
unique,,,,,,,,,,,...,,,,,,,,,,10
top,,,,,,,,,,,...,,,,,,,,,,blues
freq,,,,,,,,,,,...,,,,,,,,,,1000
mean,0.379964,0.084882,0.130039,0.002672433,2201.910957,415925.5,2244.56246,118312.1,4571.568401,1623468.0,...,49.879672,-4.193187,51.838396,0.724376,52.343694,-2.497094,54.811697,-0.929246,57.142098,
std,0.090624,0.009675,0.068168,0.003561532,750.540439,433967.5,541.420376,100250.1,1639.481644,1482634.0,...,34.35813,5.668784,36.301829,5.175797,38.06779,5.107192,41.505894,5.247202,46.342797,
min,0.108073,0.015217,0.000947,4.055916e-08,479.905803,2161.498,499.577102,1295.35,673.906438,1130.834,...,1.343237,-27.932222,1.531855,-20.749748,3.445752,-27.359076,3.147765,-35.614895,0.253587,
25%,0.316037,0.07982,0.083223,0.000628582,1634.097152,122833.6,1890.204723,49414.1,3389.905912,556238.5,...,29.52179,-7.948162,29.82122,-2.524088,29.405123,-5.734853,30.38486,-4.01272,29.925747,
50%,0.385163,0.085137,0.120488,0.001500287,2211.777107,264393.1,2233.071916,90371.22,4634.773513,1155826.0,...,41.505714,-4.444725,42.23508,0.730935,41.686157,-2.700388,43.264107,-1.045194,44.173588,
75%,0.442906,0.091154,0.175334,0.00311376,2713.457812,561220.0,2590.295338,157892.9,5597.307692,2251497.0,...,59.06206,-0.731065,61.46762,3.871771,61.854343,0.521315,65.16512,2.192562,68.00071,


## Begin Preprocessing

Apply the low pass filter through use of a moving average with a window size of 3.

In [5]:
def apply_moving_average_filter(df, window_size=3):
    feature_columns = df.columns.difference(['genre'])
    df[feature_columns] = df[feature_columns].rolling(window=window_size, min_periods=1).mean()
    return df

data = apply_moving_average_filter(data)

## Shuffle and Split data

Split data into training and testing sets (70/30).

In [6]:
# Split the data into features and target variable
X = np.array(data.iloc[:, :-1], dtype=float)
y = data.iloc[:, -1]

# Split the dataset with 70% for training set and 30% for test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

## Normalize the data set and encode the labels

In [7]:
# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Encode the target labels
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

## Model initialization function

Define function for model train and evaluation

In [8]:
# Define the model training and evaluation function with cross-validation
def train_and_evaluate_model(model, param_grid):
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
    grid_search.fit(X_train, y_train)
    best_model = grid_search.best_estimator_

    # Cross-validation
    cv_scores = cross_val_score(best_model, X_train, y_train, cv=5, scoring='accuracy')
    print(f'Cross-validation scores: {cv_scores}')
    print(f'Mean cross-validation score: {np.mean(cv_scores)}')

    y_pred = best_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, target_names=encoder.classes_)
    return best_model, accuracy, report


Random Forest Model.

In [9]:
rf_param_grid = {
    'n_estimators': [100, 250, 1000],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
rf_model = RandomForestClassifier()
rf_best_model, rf_accuracy, rf_report = train_and_evaluate_model(rf_model, rf_param_grid)
print("Random Forest Accuracy:", rf_accuracy)
print("Random Forest Classification Report:\n", rf_report)

540 fits failed out of a total of 1620.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
540 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/isaiah/Desktop/Career/Projects/music-genre-detector/.venv/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/isaiah/Desktop/Career/Projects/music-genre-detector/.venv/lib/python3.12/site-packages/sklearn/base.py", line 1466, in wrapper
    estimator._validate_params()
  File "/Users/isaiah/Desktop/Career/Projects/music-genre-detector/.venv/lib/python3.12/site-packages/sklearn/base.py", line 666, in _validate_params
    validate_parameter_constraints(


Cross-validation scores: [0.93347639 0.94559771 0.94058697 0.94488189 0.94201861]
Mean cross-validation score: 0.9413123154767573
Random Forest Accuracy: 0.9442404006677796
Random Forest Classification Report:
               precision    recall  f1-score   support

       blues       0.98      0.94      0.96       319
   classical       0.96      0.99      0.97       308
     country       0.89      0.97      0.93       286
       disco       0.94      0.91      0.92       299
      hiphop       0.97      0.90      0.93       326
        jazz       0.94      0.97      0.95       280
       metal       0.96      0.96      0.96       314
         pop       0.95      0.96      0.96       282
      reggae       0.94      0.94      0.94       303
        rock       0.91      0.91      0.91       278

    accuracy                           0.94      2995
   macro avg       0.94      0.94      0.94      2995
weighted avg       0.95      0.94      0.94      2995



Support Vector Machine Model. 

In [10]:
svm_param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['rbf', 'linear']
}
svm_model = SVC()
svm_best_model, svm_accuracy, svm_report = train_and_evaluate_model(svm_model, svm_param_grid)
print("SVM Accuracy:", svm_accuracy)
print("SVM Classification Report:\n", svm_report)

Cross-validation scores: [0.96781116 0.96921976 0.97351467 0.97709377 0.97351467]
Mean cross-validation score: 0.9722308072786259
SVM Accuracy: 0.9826377295492488
SVM Classification Report:
               precision    recall  f1-score   support

       blues       0.99      0.99      0.99       319
   classical       1.00      1.00      1.00       308
     country       0.96      0.99      0.98       286
       disco       0.98      0.96      0.97       299
      hiphop       0.99      0.95      0.97       326
        jazz       0.99      1.00      0.99       280
       metal       0.99      0.98      0.99       314
         pop       0.98      1.00      0.99       282
      reggae       0.96      0.98      0.97       303
        rock       0.97      0.97      0.97       278

    accuracy                           0.98      2995
   macro avg       0.98      0.98      0.98      2995
weighted avg       0.98      0.98      0.98      2995



Gradient Boost Model.

In [11]:
gb_param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.5],
    'max_depth': [3, 5, 7]
}
gb_model = GradientBoostingClassifier()
gb_best_model, gb_accuracy, gb_report = train_and_evaluate_model(gb_model, gb_param_grid)
print("Gradient Boosting Accuracy:", gb_accuracy)
print("Gradient Boosting Classification Report:\n", gb_report)

Cross-validation scores: [0.9434907  0.94130279 0.94488189 0.956335   0.94989263]
Mean cross-validation score: 0.9471806026197565
Gradient Boosting Accuracy: 0.9519198664440734
Gradient Boosting Classification Report:
               precision    recall  f1-score   support

       blues       0.97      0.94      0.96       319
   classical       0.99      0.98      0.99       308
     country       0.90      0.98      0.94       286
       disco       0.97      0.94      0.95       299
      hiphop       0.97      0.92      0.94       326
        jazz       0.95      0.97      0.96       280
       metal       0.97      0.95      0.96       314
         pop       0.95      0.95      0.95       282
      reggae       0.95      0.97      0.96       303
        rock       0.89      0.93      0.91       278

    accuracy                           0.95      2995
   macro avg       0.95      0.95      0.95      2995
weighted avg       0.95      0.95      0.95      2995



KNeighbours Model. 

In [12]:
knn_param_grid = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}
knn_model = KNeighborsClassifier()
knn_best_model, knn_accuracy, knn_report = train_and_evaluate_model(knn_model, knn_param_grid)
print("KNeighbors Accuracy:", knn_accuracy)
print("KNeighbors Classification Report:\n", knn_report)

Cross-validation scores: [0.97496423 0.97494631 0.97494631 0.97423049 0.97637795]
Mean cross-validation score: 0.9750930616700615
KNeighbors Accuracy: 0.9789649415692822
KNeighbors Classification Report:
               precision    recall  f1-score   support

       blues       0.99      0.97      0.98       319
   classical       0.98      1.00      0.99       308
     country       0.96      0.99      0.97       286
       disco       0.97      0.97      0.97       299
      hiphop       0.99      0.96      0.97       326
        jazz       0.99      0.97      0.98       280
       metal       0.99      0.98      0.99       314
         pop       0.99      0.99      0.99       282
      reggae       0.97      0.99      0.98       303
        rock       0.96      0.96      0.96       278

    accuracy                           0.98      2995
   macro avg       0.98      0.98      0.98      2995
weighted avg       0.98      0.98      0.98      2995



## Save models for future use

Use library to save model for export/import

In [13]:
# Save the best models
joblib.dump(rf_best_model, 'outputs/rf_best_model.pkl')
joblib.dump(svm_best_model, 'outputs/svm_best_model.pkl')
joblib.dump(gb_best_model, 'outputs/gb_best_model.pkl')
joblib.dump(knn_best_model, 'outputs/knn_best_model.pkl')

# Save the scaler and encoder
joblib.dump(scaler, 'outputs/scaler.pkl')
joblib.dump(encoder, 'outputs/encoder.pkl')

# Save the scaled data and encoded labels
joblib.dump(X_train, 'outputs/X_train.pkl')
joblib.dump(X_test, 'outputs/X_test.pkl')
joblib.dump(y_train, 'outputs/y_train.pkl')
joblib.dump(y_test, 'outputs/y_test.pkl')

['outputs/y_test.pkl']