In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import librosa.display

# to play the audio files
from IPython.display import Audio
from sklearn.model_selection import train_test_split


from keras.callbacks import ModelCheckpoint
from keras.callbacks import LearningRateScheduler
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder, StandardScaler


from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from collections.abc import Iterable

from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, BatchNormalization
from keras import layers, callbacks

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier



In [None]:
pd.set_option('display.max_columns', None)


### Reading Data

In [None]:
df = pd.read_csv('/kaggle/input/musicgenreclassification/features_30_sec.csv')

### EDA

In [None]:
df.head()

In [None]:
df.shape

In [None]:

selected_columns = [col for col in df.columns if "mfcc" in col.lower()]
subset_corr = df[selected_columns].corr()

plt.figure(figsize=(28, 24))
sns.heatmap(subset_corr, annot=True, fmt=".2f", cmap="coolwarm", square=True)
plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.title("Correlation Heatmap of Selected MFCC Features", fontsize=16)
plt.show()

In [None]:
df.columns

### Feature Engineering / Train Test split

In [None]:
if df['label'].dtype == 'object':
    le = LabelEncoder()
    df['label'] = le.fit_transform(df['label'])

In [None]:
df.drop(columns = ['filename'], inplace = True)

y = df['label']
X = df.drop(columns = ['label'])


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

print("Training set shape:", X_train.shape)
print("Testing set shape:", X_test.shape)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
results = {}

### Model Training

#### KNN

In [None]:
knn_model = KNeighborsClassifier(n_neighbors=3)
knn_model.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred = knn_model.predict(X_test_scaled)

# Calculate accuracy on the test set
knn_accuracy = accuracy_score(y_test, y_pred)
print("KNN Model Test Accuracy:", knn_accuracy)

# Save the test accuracy into the results dictionary with key 'KNN'
results['KNN'] = knn_accuracy

# Optionally, print the dictionary to verify
print("Results:", results)

#### RandomForest

In [None]:
rf_model = RandomForestClassifier(random_state=42)

# Fit the model on the training data
rf_model.fit(X_train_scaled, y_train)

# Predict the labels for the test data
y_pred = rf_model.predict(X_test_scaled)

# Calculate the test accuracy
rf_accuracy = accuracy_score(y_test, y_pred)
print("Random Forest Test Accuracy:", rf_accuracy)

# Save the test accuracy into the results dictionary under the key 'RandomForest'
results['RandomForest'] = rf_accuracy

# Optionally, print the results dictionary to verify the stored result
print("Results:", results)

#### XgBoost

In [None]:
xgb_model = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='mlogloss')

# Train the model on the training data
xgb_model.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred = xgb_model.predict(X_test_scaled)

# Calculate accuracy on the test set
xgb_accuracy = accuracy_score(y_test, y_pred)
print("XGBoost Model Test Accuracy:", xgb_accuracy)

# Save the test accuracy into the results dictionary under the key 'XGBoost'
results['XGBoost'] = xgb_accuracy

# Print out the results dictionary to verify
print("Results:", results)

#### Neural Network

In [None]:
num_classes  = y_train.nunique()

nn_model_deep = Sequential([
    tf.keras.Input(shape=(X_train_scaled.shape[1],)),
    Dense(1028, activation='relu'),
    Dropout(0.4),
    BatchNormalization(),
    
    Dense(1028, activation='relu'),
    Dropout(0.4),
    BatchNormalization(),
    
    Dense(256, activation='relu'),
    Dropout(0.4),
    BatchNormalization(),
    
    Dense(128, activation='relu'),
    Dropout(0.4),
    BatchNormalization(),
    
    Dense(32, activation='relu'),
    Dropout(0.4),
    BatchNormalization(),
    
    Dense(num_classes, activation='softmax')
])


# Compile the model
nn_model_deep.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Set up EarlyStopping to monitor validation loss with increased patience to allow more training time
early_stop_deep = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

nn_model_deep.summary()


In [None]:
checkpoint_filepath_nn = 'best_nn_model.keras'
model_checkpoint_callback_nn = ModelCheckpoint(
    filepath=checkpoint_filepath_nn,
    save_best_only=True,
    monitor='val_accuracy',
    mode='max',
    verbose=1)

In [None]:
history_deep = nn_model_deep.fit(X_train_scaled, y_train,
                                 validation_split=0.2,
                                 epochs=512,
                                 batch_size=128,
                                callbacks=[model_checkpoint_callback_nn],
                                 verbose=1)

# Evaluate the deeper model on the test set
test_loss_deep, test_accuracy_deep = nn_model_deep.evaluate(X_test_scaled, y_test, verbose=0)

results['NeuralNetwork'] = test_accuracy_deep


nn_model_deep.save("best_neural_network_model.h5")


print("Deeper Neural Network Model Accuracy:", test_accuracy_deep)

#### LSTM

In [None]:

lstm_model = Sequential([
    LSTM(256, return_sequences=False, input_shape=(40,1), recurrent_dropout=0.3,
         kernel_regularizer=l2(0.001)),
    Dropout(0.3),
    Dense(128, activation='relu', kernel_regularizer=l2(0.001)),
    Dropout(0.3),
    Dense(64, activation='relu', kernel_regularizer=l2(0.001)),
    Dropout(0.3),
    Dense(10, activation='softmax')
])

lstm_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
lstm_model.summary()

In [None]:
checkpoint_filepath = 'best_model.keras'
model_checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_best_only=True,
    monitor='val_accuracy',
    mode='max',
    verbose=1)


def scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * tf.math.exp(-0.1)

lr_scheduler_callback = LearningRateScheduler(scheduler)

history = lstm_model.fit(
    X_train_scaled, y_train,
    batch_size=128,
    validation_split=0.2,
    epochs=500,
    callbacks=[model_checkpoint_callback],
    verbose=1
)
test_loss_deep_lstm, test_accuracy_deep_lstm = lstm_model.evaluate(X_test_scaled, y_test, verbose=0)

results['LSTM'] = test_accuracy_deep_lstm


# nn_model.save("best_lstm.h5")


print("Deeper Neural Network Model Accuracy:", test_accuracy_deep_lstm)


### Final Results

In [None]:
results

### Experimentation 1

In [None]:
import os
import random
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.callbacks import ModelCheckpoint

# 1) Force Python, NumPy, and TF hashes & RNGs to be deterministic
os.environ['PYTHONHASHSEED']     = '42'
os.environ['TF_DETERMINISTIC_OPS'] = '1'
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# 2) Prepare experiment container
exper1 = {}
test_sizes = [0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.3, 0.35]

for test_size in test_sizes:
    # 3) Split with fixed seed
    X_train, X_test, y_train, y_test = train_test_split(
        X, y,
        test_size=test_size,
        random_state=42,  # ensures same split every time
        shuffle=True
    )

    # 4) Standardize
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled  = scaler.transform(X_test)

    # 5) Clone & compile model after seeding
    tf.random.set_seed(42)  # reseed before weight init
    nn_model = tf.keras.models.clone_model(nn_model_deep)
    nn_model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    # 6) Setup checkpoint
    checkpoint_filepath = f'best_model_{int(test_size*100)}.keras'
    ckpt_cb = ModelCheckpoint(
        filepath=checkpoint_filepath,
        save_best_only=True,
        monitor='val_accuracy',
        mode='max',
        verbose=1
    )

    # 7) Train (deterministically)
    history = nn_model.fit(
        X_train_scaled, y_train,
        validation_split=0.2,
        epochs=512,
        batch_size=128,
        callbacks=[ckpt_cb],
        verbose=0
    )

    # 8) Evaluate & record
    loss, acc = nn_model.evaluate(X_test_scaled, y_test, verbose=0)
    exper1[f"test_size_{int(test_size*100)}"] = {
        "Test_Accuracy": acc,
        "Model": nn_model
    }

    print(f"Test size={test_size:.3f} → Accuracy={acc:.4f}")

In [None]:
test_sizes = [0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.3, 0.35]

# Match test_sizes to dictionary keys
print("\n📊 Test Accuracies by Test Set Size:\n" + "-"*45)
for i, test_size in enumerate(test_sizes):
    key = f"test_size_{int(test_size*100):02d}"
    acc = exper1[key]['Test_Accuracy']
    print(f"Test size = {test_size:<6} →  Accuracy: {acc:.4f}")

# Find the best performing model
best_key = max(exper1, key=lambda k: exper1[k]['Test_Accuracy'])
best_acc = exper1[best_key]['Test_Accuracy']
best_model = exper1[best_key]['Model']

# Extract the corresponding test size from the list
best_index = list(exper1.keys()).index(best_key)
best_test_size = test_sizes[best_index]

print("\n🏆 Best Performing Split:")
print(f"Test size = {best_test_size} with Accuracy: {best_acc:.4f}")

# Save best model
filename = f"best_model_testsize_{str(best_test_size).replace('.', '_')}.h5"
best_model.save(filename)
print(f"\n💾 Best model saved as: {filename}")

In [None]:
import matplotlib.pyplot as plt

# Extract data for plotting
test_sizes = [0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.3, 0.35]
accuracies = [exper1[f"test_size_{int(ts*100)}"]["Test_Accuracy"] for ts in test_sizes]

# Plot the test accuracy line
plt.figure(figsize=(10, 6))
plt.plot(test_sizes, accuracies, color='black', linewidth=2.5, label='Model A')

# Optional: add baseline (e.g., 0.5)
baseline = 0.5
plt.axhline(y=baseline, color='lightgray', linestyle='--', linewidth=1.5)
plt.text(test_sizes[-1] + 0.005, baseline + 0.005, "Baseline", color='gray')

# Labels for each axis
plt.xlabel("Test Size", fontsize=12)
plt.ylabel("Test Accuracy", fontsize=12)

# Annotate the highest point
best_idx = int(np.argmax(accuracies))
plt.text(test_sizes[best_idx], accuracies[best_idx] + 0.02,
         f"{accuracies[best_idx]*100:.1f}%", color='black', fontsize=12)

# Add model label at end of line
plt.text(test_sizes[-1] + 0.005, accuracies[-1], "Model A", fontsize=12, color='black')

# Grid and title
plt.grid(alpha=0.3)
plt.title("Test Accuracy by Test Set Size", fontsize=14)

# Plot style
plt.ylim(0, 1.05)
plt.xlim(min(test_sizes) - 0.01, max(test_sizes) + 0.03)
plt.xticks(test_sizes)
plt.tight_layout()
plt.savefig('ModelSplitPerformance.png')
plt.show()

In [None]:
best_model = exper1[best_key]["Model"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.125, random_state=42
)

# 2. Scale test data using the same method as before
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


# 4. Predict class probabilities and convert to labels
y_pred_probs = best_model.predict(X_test_scaled)
y_pred = np.argmax(y_pred_probs, axis=1)

# 5. Compute accuracy
test_accuracy = accuracy_score(y_test, y_pred)
print(f"\n✅ Test Accuracy (confirmed): {test_accuracy:.4f}")

# 6. Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
genre_labels = le.inverse_transform(np.arange(10))  # Map class numbers to genre names

# 7. Plot confusion matrix
fig, ax = plt.subplots(figsize=(10, 8))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=genre_labels)
disp.plot(cmap='Blues', xticks_rotation=45, ax=ax)

plt.title("Confusion Matrix - Feature Based Classifier", fontsize=14)
plt.xlabel("Predicted Genre", fontsize=12)
plt.ylabel("True Genre", fontsize=12)
plt.tight_layout()
plt.savefig('Confusion')
plt.show()

In [None]:
results

### Feature Extraction Automation

In [None]:
class AudioFeature:
    import os 
    import numpy as np
    import librosa

    def __init__(self, path):
        self.path = path

        if os.path.exists(self.path):
            self.y, self.sr = librosa.load(self.path)

        else:
            raise Exception(f"Path not found - {self.path}")

        self.load_audio_file()


    def get_dataframe(self):
        """
        Return all extracted features as a single-row pandas DataFrame
        with columns matching the feature names.
        """
        features = self.get_all_params()
        
        df = pd.DataFrame([features])
        
        return df

    def get_all_params(self):

        self.get_length()
        self.get_zero_crossings()
        self.get_tempo()
        self.get_centroids()
        self.get_spectral_rolloff()
        self.get_mel_frequencies()
        self.get_chroma()
        self.get_rms()
        self.get_spectral_bandwith()
        self.get_harmony()
        self.get_perceptr()
        
        result = {
            "length" : self.length,
            "chroma_stft_mean" : self.chroma_stft_mean,
            "chroma_stft_var" : self.chroma_stft_var,
            "rms_mean" : self.rms_mean,
            "rms_var" : self.rms_var,
            "spectral_centroid_mean" : self.spectral_centriod_mean,
            "spectral_centroid_var" : self.spectral_centriod_var,
            "spectral_bandwith_mean" : self.spectral_bandwith_mean,
            "spectral_bandwith_var" : self.spectral_bandwith_var,
            "rolloff_mean" : self.rolloff_mean,
            "rolloff_var" : self.rolloff_var,
            "zero_crossing_rate_mean": self.zero_crossings_rate_mean,
            "zero_crossing_rate_var" : self.zero_crossings_rate_var,
            "harmony_mean" : self.harmony_mean,
            "harmony_var" : self.harmony_var,
            "perceptr_mean" : self.perceptr_mean,
            "perceptr_var": self.perceptr_var,
            "tempo" : self.tempo

        }

        result.update(self.mel_frequencies)

        return  result


    def load_audio_file(self):
        self.audio_file, _ = librosa.effects.trim(self.y)

        return self.audio_file

    def get_harmony(self):
        harmony = librosa.effects.harmonic(y = self.audio_file)
        self.harmony_mean = np.mean(harmony)
        self.harmony_var = np.var(harmony)

        return self.harmony_mean, self.harmony_var
        

    def get_length(self):
        self.length = np.shape(self.audio_file)[0]
        
        return self.length

    def get_zero_crossings(self):
        zero_crossings = librosa.zero_crossings(self.audio_file, pad=False)

        self.zero_crossings_rate_mean = np.mean(zero_crossings)
        self.zero_crossings_rate_var = np.var(zero_crossings)

        return self.zero_crossings_rate_mean, self.zero_crossings_rate_var

    def get_rms(self):
        rms = librosa.feature.rms(y = self.audio_file)

        self.rms_mean = np.mean(rms)
        self.rms_var = np.var(rms)

        return self.rms_mean, self.rms_var

    def get_spectral_bandwith(self):
        spectral_bandwith = librosa.feature.spectral_bandwidth(y = self.audio_file, sr = self.sr)
        self.spectral_bandwith_mean = np.mean(spectral_bandwith)
        self.spectral_bandwith_var = np.var(spectral_bandwith)

        return self.spectral_bandwith_mean, self.spectral_bandwith_var
    

    def get_tempo(self):
        # Estimate tempo (BPM) from your audio time series
        self.tempo, _ = librosa.beat.beat_track(
            y=self.y,
            sr=self.sr
        )

        self.tempo = self.tempo[0]
        
        return self.tempo

    def get_centroids(self):
        spectral_centroids = librosa.feature.spectral_centroid(y = self.audio_file, sr=self.sr)[0]

        self.spectral_centriod_mean = np.mean(spectral_centroids)
        self.spectral_centriod_var = np.var(spectral_centroids)

        return self.spectral_centriod_mean, self.spectral_centriod_var

    def get_perceptr(self):
        # 1. Compute Mel‐spectrogram
        S = librosa.feature.melspectrogram(
            y=self.audio_file,
            sr=self.sr,
            hop_length=5000,
        )
        
        pcen = librosa.pcen(
            S,
            sr=self.sr,
            hop_length=5000,
        )
        
        # 3. Compute statistics
        self.perceptr_mean = np.mean(pcen)
        self.perceptr_var  = np.var(pcen)
        
        
        return pcen
    
    def get_spectral_rolloff(self):

        spectral_rolloff = librosa.feature.spectral_rolloff(y = self.audio_file, sr=self.sr)[0]

        self.rolloff_mean = np.mean(spectral_rolloff)
        self.rolloff_var = np.var(spectral_rolloff)


        return self.rolloff_mean, self.rolloff_var

    def get_chroma(self, hop_length = 5000):
        
        chromagram = librosa.feature.chroma_stft(y = self.audio_file, sr=self.sr, hop_length=hop_length)
        self.chroma_stft_mean = np.mean(chromagram)
        self.chroma_stft_var = np.var(chromagram)

        return self.chroma_stft_mean , self.chroma_stft_var
    
    def get_mel_frequencies(self):
        mfccs = librosa.feature.mfcc(y = self.audio_file, sr=self.sr)
        self.mel_frequencies = {}
        for i, mfcc in enumerate( mfccs):
            self.mel_frequencies[f'mfcc{i+1}_mean'] = np.mean(mfcc)
            self.mel_frequencies[f'mfcc{i+1}_var'] = np.var(mfcc)

        return self.mel_frequencies
        

In [None]:
test_input_path = '/kaggle/input/musicgenreclassification/genres_original/blues/blues.00000.wav'

audio_features = AudioFeature(test_input_path)

df = audio_features.get_dataframe()

In [None]:
df