# Import Required Libraries
Import necessary libraries such as pandas, numpy, librosa, sklearn, matplotlib, and pickle.

In [1]:
import pandas as pd
import numpy as np
import librosa
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
import pickle

# Load and Explore the Dataset
Load the 30-second and 3-second features datasets using pandas and display the first few rows to understand the structure.

In [2]:
# Load the dataset
df_30_sec = pd.read_csv('Data/features_30_sec.csv')
df_3_sec = pd.read_csv('Data/features_3_sec.csv')

# Display the shape and summary of the datasets
print(f"** df_30_sec has {df_30_sec.shape[0]} rows and {df_30_sec.shape[1]} columns **")
df_30_sec.describe()

print(f"** df_3_sec has {df_3_sec.shape[0]} rows and {df_3_sec.shape[1]} columns **")
df_30_sec.describe()

# Display the first few rows of the datasets
print("30-Second Features Dataset:")
print(df_30_sec.head())

print("\n3-Second Segments Dataset:")
print(df_3_sec.head())

** df_30_sec has 1000 rows and 60 columns **
** df_3_sec has 9990 rows and 60 columns **
30-Second Features Dataset:
          filename  length  chroma_stft_mean  chroma_stft_var  rms_mean  \
0  blues.00000.wav  661794          0.350088         0.088757  0.130228   
1  blues.00001.wav  661794          0.340914         0.094980  0.095948   
2  blues.00002.wav  661794          0.363637         0.085275  0.175570   
3  blues.00003.wav  661794          0.404785         0.093999  0.141093   
4  blues.00004.wav  661794          0.308526         0.087841  0.091529   

    rms_var  spectral_centroid_mean  spectral_centroid_var  \
0  0.002827             1784.165850          129774.064525   
1  0.002373             1530.176679          375850.073649   
2  0.002746             1552.811865          156467.643368   
3  0.006346             1070.106615          184355.942417   
4  0.002303             1835.004266          343399.939274   

   spectral_bandwidth_mean  spectral_bandwidth_var  ...  mf

# Feature Extraction
Extract relevant audio features (e.g., MFCCs, spectral contrast, tempo) using librosa.

In [3]:
# Function to extract audio features from a file
def extract_features(file_name):
    y, sr = librosa.load(file_name, duration=30)

    # Extracting MFCCs
    mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T, axis=0)

    # Extracting Spectral Contrast
    spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=y, sr=sr).T, axis=0)

    # Extracting Zero-Crossing Rate
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y).T, axis=0)

    # Extracting Tempo
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)

    # Combining all features into a single array
    features = np.hstack([mfccs, spectral_contrast, zero_crossing_rate, tempo])

    return features

example_features = extract_features('Data/genres_original/blues/blues.00000.wav')
print("Extracted Features:", example_features)

Extracted Features: [-1.13619385e+02  1.21553032e+02 -1.91510563e+01  4.23457680e+01
 -6.37116766e+00  1.86130333e+01 -1.36920605e+01  1.53393784e+01
 -1.22836170e+01  1.09737759e+01 -8.32240963e+00  8.80678749e+00
 -3.66580200e+00  1.59876027e+01  1.51024777e+01  1.84584091e+01
  1.83631251e+01  1.89187388e+01  1.71902361e+01  3.96675767e+01
  8.30663911e-02  1.23046875e+02]


# Train-Test Split
Split the dataset into training and testing sets using train_test_split from sklearn.

In [4]:
# Define the features (X) and labels (y) for the 30-second features dataset
X_30_sec = df_30_sec.drop(columns=['filename', 'label'])
y_30_sec = df_30_sec['label']

# Define the features (X) and labels (y) for the 3-second segments dataset
X_3_sec = df_3_sec.drop(columns=['filename', 'label'])
y_3_sec = df_3_sec['label']

# Split the 30-second features dataset into training and testing sets
X_train_30_sec, X_test_30_sec, y_train_30_sec, y_test_30_sec = train_test_split(X_30_sec, y_30_sec, test_size=0.2, random_state=42)

# Split the 3-second segments dataset into training and testing sets
X_train_3_sec, X_test_3_sec, y_train_3_sec, y_test_3_sec = train_test_split(X_3_sec, y_3_sec, test_size=0.2, random_state=42)

# Display the shapes of the resulting datasets
print("30-Second Features Dataset:")
print("Training set shape:", X_train_30_sec.shape, y_train_30_sec.shape)
print("Testing set shape:", X_test_30_sec.shape, y_test_30_sec.shape)

print("\n3-Second Segments Dataset:")
print("Training set shape:", X_train_3_sec.shape, y_train_3_sec.shape)
print("Testing set shape:", X_test_3_sec.shape, y_test_3_sec.shape)

30-Second Features Dataset:
Training set shape: (800, 58) (800,)
Testing set shape: (200, 58) (200,)

3-Second Segments Dataset:
Training set shape: (7992, 58) (7992,)
Testing set shape: (1998, 58) (1998,)


# Train the Random Forest Classifier
Train a Random Forest Classifier on the training data. Random forests are less prone to overfitting due to their ensemble nature

In [5]:
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier

# Encode the target labels
label_encoder = LabelEncoder()
y_train_30_sec_encoded = label_encoder.fit_transform(y_train_30_sec)
y_test_30_sec_encoded = label_encoder.transform(y_test_30_sec)

# Initialize the Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier on the 30-second features dataset
rf_classifier.fit(X_train_30_sec, y_train_30_sec_encoded)

# Predict on the test set
y_pred_30_sec = rf_classifier.predict(X_test_30_sec)

# Decode the predicted labels back to original
y_pred_30_sec_decoded = label_encoder.inverse_transform(y_pred_30_sec)

# Evaluate the model using original labels
accuracy_30_sec = accuracy_score(y_test_30_sec, y_pred_30_sec_decoded)
classification_report_30_sec = classification_report(y_test_30_sec, y_pred_30_sec_decoded)

print("30-Second Features Dataset:")
print("Accuracy:", accuracy_30_sec)
print("Classification Report:\n", classification_report_30_sec)

# Encode the target labels for the 3-second segments dataset
y_train_3_sec_encoded = label_encoder.fit_transform(y_train_3_sec)
y_test_3_sec_encoded = label_encoder.transform(y_test_3_sec)

# Train the classifier on the 3-second segments dataset
rf_classifier.fit(X_train_3_sec, y_train_3_sec_encoded)

# Predict on the test set
y_pred_3_sec = rf_classifier.predict(X_test_3_sec)

# Decode the predicted labels back to original
y_pred_3_sec_decoded = label_encoder.inverse_transform(y_pred_3_sec)

# Evaluate the model using original labels
accuracy_3_sec = accuracy_score(y_test_3_sec, y_pred_3_sec_decoded)
classification_report_3_sec = classification_report(y_test_3_sec, y_pred_3_sec_decoded)

print("\n3-Second Segments Dataset:")
print("Accuracy:", accuracy_3_sec)
print("Classification Report:\n", classification_report_3_sec)

30-Second Features Dataset:
Accuracy: 0.76
Classification Report:
               precision    recall  f1-score   support

       blues       0.64      0.90      0.75        20
   classical       1.00      1.00      1.00        13
     country       0.76      0.59      0.67        27
       disco       0.75      0.71      0.73        21
      hiphop       0.59      0.87      0.70        15
        jazz       0.95      0.91      0.93        22
       metal       0.85      0.92      0.88        25
         pop       0.79      0.85      0.81        13
      reggae       0.78      0.61      0.68        23
        rock       0.56      0.43      0.49        21

    accuracy                           0.76       200
   macro avg       0.77      0.78      0.77       200
weighted avg       0.77      0.76      0.76       200


3-Second Segments Dataset:
Accuracy: 0.8813813813813813
Classification Report:
               precision    recall  f1-score   support

       blues       0.86      0.85     

# Evaluate the Model
