# 1. Import Libraries

In [2]:
import os
import numpy as np
import pandas as pd
import soundfile as sf
import librosa
import joblib
import warnings

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier

warnings.filterwarnings("ignore")

# 2. Feature Extraction

In [None]:
def extract_all_features(file_path):
    try:
        y, sr = sf.read(file_path, dtype="float32")
        if len(y.shape) > 1:  # stereo → mono
            y = np.mean(y, axis=1)

        y = np.array(y, dtype=np.float32)

        features = {
            "length": len(y),
            "chroma_stft_mean": np.mean(librosa.feature.chroma_stft(y=y, sr=sr)),
            "chroma_stft_var": np.var(librosa.feature.chroma_stft(y=y, sr=sr)),
            "rms_mean": np.mean(librosa.feature.rms(y=y)),
            "rms_var": np.var(librosa.feature.rms(y=y)),
            "spectral_centroid_mean": np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)),
            "spectral_centroid_var": np.var(librosa.feature.spectral_centroid(y=y, sr=sr)),
            "spectral_bandwidth_mean": np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr)),
            "spectral_bandwidth_var": np.var(librosa.feature.spectral_bandwidth(y=y, sr=sr)),
            "rolloff_mean": np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)),
            "rolloff_var": np.var(librosa.feature.spectral_rolloff(y=y, sr=sr)),
            "zero_crossing_rate_mean": np.mean(librosa.feature.zero_crossing_rate(y=y)),
            "zero_crossing_rate_var": np.var(librosa.feature.zero_crossing_rate(y=y)),
        }

        # MFCCs
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
        for i in range(20):
            features[f"mfcc{i+1}_mean"] = np.mean(mfccs[i])
            features[f"mfcc{i+1}_var"] = np.var(mfccs[i])

        return features
    except Exception as e:
        print(f"⚠️ Skipping {file_path} due to error: {e}")
        return None


# 3. Load Dataset and Extract Features

In [4]:
dataset_path = "genres_original"   # training folder (each subfolder = genre)

data, labels = [], []

for genre in os.listdir(dataset_path):
    genre_path = os.path.join(dataset_path, genre)
    if os.path.isdir(genre_path):
        for file in os.listdir(genre_path):
            if file.lower().endswith((".wav", ".mp3")):
                file_path = os.path.join(genre_path, file)
                features = extract_all_features(file_path)
                if features:
                    data.append(features)
                    labels.append(genre)

df = pd.DataFrame(data)
print(f"✅ Extracted features from {len(df)} files, feature count: {df.shape[1]}")

⚠️ Skipping genres_original\jazz\jazz.00054.wav due to error: Error opening 'genres_original\\jazz\\jazz.00054.wav': Format not recognised.
✅ Extracted features from 999 files, feature count: 53


# 4. Preprocessing

In [5]:
# encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(labels)

# scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df)

# train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)


# 5. Train Models and Compare

In [6]:
models = {
    "RandomForest": RandomForestClassifier(n_estimators=100, random_state=42),
    "DecisionTree": DecisionTreeClassifier(random_state=42),
    "LogisticRegression": LogisticRegression(max_iter=2000),
    "XGBoost": XGBClassifier(
        n_estimators=200,
        learning_rate=0.05,
        max_depth=6,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42,
        eval_metric="mlogloss"
    )
}

best_model = None
best_acc = 0
best_name = ""

for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    acc = accuracy_score(y_test, preds)
    print(f"\n📊 {name} Accuracy: {acc:.4f}")
    print(classification_report(y_test, preds, target_names=le.classes_))

    if acc > best_acc:
        best_acc, best_model, best_name = acc, model, name

print(f"\n🏆 Best Model: {best_name} with Accuracy = {best_acc:.4f}")


📊 RandomForest Accuracy: 0.8000
              precision    recall  f1-score   support

       blues       0.83      0.71      0.77        21
   classical       0.71      1.00      0.83        12
     country       0.77      0.71      0.74        24
       disco       1.00      0.82      0.90        22
      hiphop       0.76      0.87      0.81        15
        jazz       0.92      0.89      0.91        27
       metal       0.86      1.00      0.92        18
         pop       0.81      0.89      0.85        19
      reggae       0.81      0.77      0.79        22
        rock       0.47      0.45      0.46        20

    accuracy                           0.80       200
   macro avg       0.79      0.81      0.80       200
weighted avg       0.81      0.80      0.80       200


📊 DecisionTree Accuracy: 0.6350
              precision    recall  f1-score   support

       blues       0.52      0.52      0.52        21
   classical       0.83      0.83      0.83        12
     country

# 6. Save Best Model

In [7]:
model_path = "best_genre_model.pkl"
joblib.dump((best_model, scaler, le), model_path)
print(f"💾 Saved best model ({best_name}) to {model_path}")

💾 Saved best model (RandomForest) to best_genre_model.pkl


# 7. Prediction Function

In [8]:
def predict_genre(audio_path):
    model, scaler, le = joblib.load("best_genre_model.pkl")
    features = extract_all_features(audio_path)
    if features is None:
        return None
    X = pd.DataFrame([features])
    X_scaled = scaler.transform(X)
    pred = model.predict(X_scaled)
    genre = le.inverse_transform(pred)[0]
    print(f"🎵 {audio_path} → {genre}")
    return genre

# 8. Test on New Files

In [9]:
test_folder = "test"

for file in os.listdir(test_folder):
    if file.lower().endswith((".wav", ".mp3")):
        predict_genre(os.path.join(test_folder, file))

🎵 test\classical.00005.wav → classical
🎵 test\pop.00000.wav → pop
🎵 test\rock.00001.wav → rock
