In [6]:
# -*- coding: utf-8 -*-
# ---
# jupyter:
#   jupytext:
#     text_representation:
#       extension: .py
#       format_name: light
#       format_version: '1.5'
#       jupytext_version: 1.16.1
#   kernelspec:
#     display_name: Python 3 (ipykernel)
#     language: python
#     name: python3
# ---

# # Enhanced Custom Music Genre Classification Model

# ## Overview
# This notebook builds upon previous iterations to create a potentially more robust custom CNN model for genre classification using Mel Spectrograms from the GTZAN dataset.
#
# **Key Features & Improvements:**
# 1.  **Mel Spectrogram Input:** Uses dB-scaled Mel spectrograms.
# 2.  **Global Normalization:** Correctly applies Min-Max scaling based on the training set.
# 3.  **Data Augmentation:** Integrates Frequency and Time Masking directly into the model using custom layers.
# 4.  **Refined CNN Architecture:** A slightly deeper CNN with consistent application of Batch Normalization, Pooling, Dropout, and L2 Regularization.
# 5.  **Robust Training:** Uses EarlyStopping, ReduceLROnPlateau, and ModelCheckpoint callbacks.
# 6.  **Modern Saving Format:** Uses the recommended '.keras' format.
# 7.  **Reproducibility:** Sets random seeds.
# 8.  **Comprehensive Evaluation:** Includes history plots, test set evaluation, confusion matrix, and classification report.
#
# **Note:** This model is built *from scratch* and does *not* use transfer learning.

# ## 1. Imports

In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
import os
import json
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.regularizers import l2 # Import L2 regularizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.saving import register_keras_serializable
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import math
import random
import warnings
warnings.filterwarnings('ignore')

# ## 2. Configuration

In [8]:
# --- Paths ---
# Adjust this path to where your GTZAN 'genres_original' folder is located
RAW_DATA_PATH = '../data/raw/GTZAN/genres_original'
# Adjust this path to where your pre-processed '.npy' spectrogram files are located
SPECTROGRAM_NPY_PATH = '../data/processed/spectrograms_npy'
# Path to save the best model
MODEL_SAVE_DIR = '../model'
BEST_MODEL_PATH = os.path.join(MODEL_SAVE_DIR, 'best_custom_cnn_genre_model.keras')

In [9]:
# --- Audio Parameters ---
SAMPLE_RATE = 22050
DURATION = 30  # GTZAN duration
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION

In [10]:
# --- Spectrogram Parameters ---
N_MELS = 128   # Number of Mel bands
N_FFT = 2048   # Window size for FFT
HOP_LENGTH = 512 # Hop length for STFT

# Calculate expected spectrogram length (time bins)
# Using math.ceil ensures we capture the last frame
EXPECTED_LENGTH = math.ceil(SAMPLES_PER_TRACK / HOP_LENGTH)
print(f"Using Target Spectrogram Length (Time Bins): {EXPECTED_LENGTH}")

Using Target Spectrogram Length (Time Bins): 1292


In [11]:
# --- Genre Classes ---
GENRES = sorted([
    'blues', 'classical', 'country', 'disco', 'hiphop',
    'jazz', 'metal', 'pop', 'reggae', 'rock'
]) # Sorting ensures consistent order
NUM_GENRES = len(GENRES)


In [12]:
# --- Augmentation Parameters ---
FREQ_MASK_PARAM = 25 # Max number of frequency bands to mask (Adjusted slightly)
TIME_MASK_PARAM = 40 # Max number of time steps to mask (Adjusted slightly)

In [13]:
# --- Training Parameters ---
EPOCHS = 150      # Max epochs (EarlyStopping will likely stop it sooner)
BATCH_SIZE = 32
VALIDATION_SPLIT = 0.2 # Use 20% of training data for validation during training
LEARNING_RATE = 1e-4 # Initial learning rate

In [14]:
# --- Reproducibility ---
SEED = 42
os.environ['PYTHONHASHSEED']=str(SEED)
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
# Optional: Determinism (can impact performance)
# tf.config.experimental.enable_op_determinism()

In [15]:
# --- Create Model Save Directory ---
os.makedirs(MODEL_SAVE_DIR, exist_ok=True)

# ## 3. Data Loading & Preprocessing

# Check if pre-processed data exists, otherwise generate spectrograms
# For this notebook, we assume the '.npy' files exist from a previous step.
# If not, you would add the code from the previous notebook here to generate them from RAW_DATA_PATH.

In [16]:
# Function to resize spectrograms
def resize_spectrogram(spec, target_length):
    current_length = spec.shape[1]
    if current_length > target_length:
        return spec[:, :target_length] # Truncate
    elif current_length < target_length:
        # Pad with the minimum value of the spectrogram (often silence)
        padding_value = np.min(spec)
        padding = np.full((spec.shape[0], target_length - current_length), padding_value)
        return np.hstack((spec, padding)) # Pad
    else:
        return spec

# Load all spectrograms from individual files
spectrograms = []
labels = []

print(f"Loading spectrograms from: {SPECTROGRAM_NPY_PATH}")
if not os.path.exists(SPECTROGRAM_NPY_PATH):
     raise FileNotFoundError(f"Spectrogram directory not found: {SPECTROGRAM_NPY_PATH}. "
                             "Please ensure spectrograms have been generated and saved as .npy files.")

for genre_idx, genre in enumerate(GENRES):
    genre_path = os.path.join(SPECTROGRAM_NPY_PATH, genre)
    if os.path.exists(genre_path):
        print(f'Loading {genre} spectrograms...')
        files_loaded = 0
        for file in os.listdir(genre_path):
            if file.endswith('.npy'):
                try:
                    spec = np.load(os.path.join(genre_path, file))
                    # Ensure spec is 2D (Mel Bands, Time) and has expected number of Mel bands
                    if spec.ndim == 2 and spec.shape[0] == N_MELS:
                        resized_spec = resize_spectrogram(spec, EXPECTED_LENGTH)
                        # Check final shape
                        if resized_spec.shape == (N_MELS, EXPECTED_LENGTH):
                            spectrograms.append(resized_spec)
                            labels.append(genre_idx)
                            files_loaded += 1
                        else:
                            print(f"Skipping {file} - incorrect shape after resize: {resized_spec.shape}")
                    else:
                        print(f"Skipping {file} - unexpected shape {spec.shape} or wrong Mel bands")
                except Exception as e:
                    print(f"Error loading or processing {file}: {e}")
        print(f" -> Loaded {files_loaded} files for {genre}.")
    else:
        print(f"Warning: Genre directory not found: {genre_path}")


if not spectrograms:
    raise ValueError("No spectrograms loaded. Check path and file contents.")

spectrograms = np.array(spectrograms)
labels = np.array(labels)

print(f"\nLoaded {len(spectrograms)} total spectrograms.")
if len(spectrograms) > 0:
    print(f"Shape of spectrogram array: {spectrograms.shape}") # (num_samples, N_MELS, EXPECTED_LENGTH)
else:
     raise ValueError("Spectrogram list is empty after loading.")

Loading spectrograms from: ../data/processed/spectrograms_npy
Loading blues spectrograms...
 -> Loaded 100 files for blues.
Loading classical spectrograms...
 -> Loaded 100 files for classical.
Loading country spectrograms...
 -> Loaded 100 files for country.
Loading disco spectrograms...
 -> Loaded 100 files for disco.
Loading hiphop spectrograms...
 -> Loaded 100 files for hiphop.
Loading jazz spectrograms...
 -> Loaded 99 files for jazz.
Loading metal spectrograms...
 -> Loaded 100 files for metal.
Loading pop spectrograms...
 -> Loaded 100 files for pop.
Loading reggae spectrograms...
 -> Loaded 100 files for reggae.
Loading rock spectrograms...
 -> Loaded 100 files for rock.

Loaded 999 total spectrograms.
Shape of spectrogram array: (999, 128, 1292)


In [17]:
# ## 4. Prepare Data for Training

print("\nPreparing data for training...")

# --- Add Channel Dimension ---
if spectrograms.ndim == 3:
    X = spectrograms[..., np.newaxis]
    print(f"Data shape after adding channel: {X.shape}") # (num_samples, N_MELS, EXPECTED_LENGTH, 1)
else:
    raise ValueError(f"Unexpected spectrogram array dimension: {spectrograms.ndim}. Expected 3.")

# --- Encode Labels ---
y = to_categorical(labels, num_classes=NUM_GENRES)
print(f"Labels shape after one-hot encoding: {y.shape}")

# --- Train/Validation/Test Split ---
# First split into Train (80%) and Test (20%)
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=SEED,
    stratify=y # Ensure proportional representation of genres
)

# Then split Train into Train (80% of original 80% = 64%) and Validation (20% of original 80% = 16%)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train,
    test_size=0.2, # 0.2 of the 80% = 16% of total
    random_state=SEED,
    stratify=y_train # Stratify based on the training labels
)

print(f"Training set shape:   {X_train.shape}, {y_train.shape}")
print(f"Validation set shape: {X_val.shape}, {y_val.shape}")
print(f"Testing set shape:    {X_test.shape}, {y_test.shape}")


Preparing data for training...
Data shape after adding channel: (999, 128, 1292, 1)
Labels shape after one-hot encoding: (999, 10)
Training set shape:   (639, 128, 1292, 1), (639, 10)
Validation set shape: (160, 128, 1292, 1), (160, 10)
Testing set shape:    (200, 128, 1292, 1), (200, 10)


In [18]:
# --- Global Normalization (Min-Max Scaling) ---
print("Applying global Min-Max normalization...")
scaler = MinMaxScaler()

# Reshape data for scaler (flatten frequency and time dimensions)
original_train_shape = X_train.shape
original_val_shape = X_val.shape
original_test_shape = X_test.shape

X_train_reshaped = X_train.reshape(original_train_shape[0], -1)
X_val_reshaped = X_val.reshape(original_val_shape[0], -1)
X_test_reshaped = X_test.reshape(original_test_shape[0], -1)

# Fit scaler ONLY on the training data
scaler.fit(X_train_reshaped)

# Transform all datasets
X_train_scaled_reshaped = scaler.transform(X_train_reshaped)
X_val_scaled_reshaped = scaler.transform(X_val_reshaped)
X_test_scaled_reshaped = scaler.transform(X_test_reshaped)

# Reshape back to original image format (with channel dimension)
X_train = X_train_scaled_reshaped.reshape(original_train_shape)
X_val = X_val_scaled_reshaped.reshape(original_val_shape)
X_test = X_test_scaled_reshaped.reshape(original_test_shape)

print("Normalization complete.")
print(f"Scaled X_train min: {np.min(X_train):.4f}, max: {np.max(X_train):.4f}")
print(f"Scaled X_val min: {np.min(X_val):.4f}, max: {np.max(X_val):.4f}")
print(f"Scaled X_test min: {np.min(X_test):.4f}, max: {np.max(X_test):.4f}")

Applying global Min-Max normalization...
Normalization complete.
Scaled X_train min: 0.0000, max: 1.0000
Scaled X_val min: -0.3909, max: 1.2813
Scaled X_test min: -0.3736, max: 1.2646


In [19]:
# ## 5. Custom Augmentation Layers

# Registering custom layers allows saving/loading models that use them
@register_keras_serializable(package="Custom", name="FrequencyMasking")
class FrequencyMasking(layers.Layer):
    """Applies Frequency Masking augmentation."""
    def __init__(self, freq_mask_param, name="frequency_masking", **kwargs):
        super().__init__(name=name, **kwargs)
        self.freq_mask_param = freq_mask_param

    def call(self, inputs, training=None):
        if training is None: # Handle case during model build
            training = False

        def apply_augmentation():
            n_mels = tf.shape(inputs)[1] # Frequency dimension
            f = tf.random.uniform(shape=(), minval=0, maxval=self.freq_mask_param + 1, dtype=tf.int32)
            def perform_mask():
                f0 = tf.random.uniform(shape=(), minval=0, maxval=n_mels - f, dtype=tf.int32)
                mask_value = 0.0 # Mask with zero (assuming normalized input near zero)
                mask = tf.concat(
                    [tf.ones(shape=(1, f0, 1, 1), dtype=inputs.dtype),
                     tf.fill(dims=(1, f, 1, 1), value=mask_value),
                     tf.ones(shape=(1, n_mels - f0 - f, 1, 1), dtype=inputs.dtype)],
                    axis=1
                )
                # Ensure mask aligns with the batch dimension
                batch_size = tf.shape(inputs)[0]
                mask_repeated = tf.tile(mask, [batch_size, 1, tf.shape(inputs)[2], 1])
                return inputs * mask_repeated

            # Only apply if f > 0
            return tf.cond(tf.greater(f, 0), true_fn=perform_mask, false_fn=lambda: inputs)

        # Apply augmentation only during training
        return tf.cond(tf.cast(training, tf.bool),
                       true_fn=apply_augmentation,
                       false_fn=lambda: inputs)

    def get_config(self):
        config = super().get_config()
        config.update({"freq_mask_param": self.freq_mask_param})
        return config

@register_keras_serializable(package="Custom", name="TimeMasking")
class TimeMasking(layers.Layer):
    """Applies Time Masking augmentation."""
    def __init__(self, time_mask_param, name="time_masking", **kwargs):
        super().__init__(name=name, **kwargs)
        self.time_mask_param = time_mask_param

    def call(self, inputs, training=None):
        if training is None:
             training = False

        def apply_augmentation():
            time_steps = tf.shape(inputs)[2] # Time dimension
            t = tf.random.uniform(shape=(), minval=0, maxval=self.time_mask_param + 1, dtype=tf.int32)
            def perform_mask():
                t0 = tf.random.uniform(shape=(), minval=0, maxval=time_steps - t, dtype=tf.int32)
                mask_value = 0.0
                mask = tf.concat(
                    [tf.ones(shape=(1, 1, t0, 1), dtype=inputs.dtype),
                     tf.fill(dims=(1, 1, t, 1), value=mask_value),
                     tf.ones(shape=(1, 1, time_steps - t0 - t, 1), dtype=inputs.dtype)],
                    axis=2 # Concatenate along the time axis
                )
                batch_size = tf.shape(inputs)[0]
                mask_repeated = tf.tile(mask, [batch_size, tf.shape(inputs)[1], 1, 1])
                return inputs * mask_repeated

            return tf.cond(tf.greater(t, 0), true_fn=perform_mask, false_fn=lambda: inputs)

        return tf.cond(tf.cast(training, tf.bool),
                       true_fn=apply_augmentation,
                       false_fn=lambda: inputs)

    def get_config(self):
        config = super().get_config()
        config.update({"time_mask_param": self.time_mask_param})
        return config

In [20]:
# ## 6. Build "Best Case" Custom CNN Model (No Transfer Learning)

def build_custom_cnn_model(input_shape, num_genres, l2_reg=0.001):
    """Builds a refined custom CNN model with regularization."""

    model = models.Sequential(name="Custom_CNN_Genre_Classifier")

    # Input Layer + Augmentation
    model.add(layers.Input(shape=input_shape, name="Input"))
    model.add(FrequencyMasking(freq_mask_param=FREQ_MASK_PARAM, name="FreqMask"))
    model.add(TimeMasking(time_mask_param=TIME_MASK_PARAM, name="TimeMask"))
    # Optional: Add Gaussian Noise for more augmentation
    # model.add(layers.GaussianNoise(0.05, name="GaussianNoise"))

    # --- Feature Extraction Blocks ---

    # Block 1
    model.add(layers.Conv2D(32, (3, 3), padding='same', kernel_regularizer=l2(l2_reg), name="Conv1_1"))
    model.add(layers.BatchNormalization(name="BN1_1"))
    model.add(layers.Activation('relu', name="Relu1_1"))
    model.add(layers.Conv2D(32, (3, 3), padding='same', kernel_regularizer=l2(l2_reg), name="Conv1_2")) # Added second conv
    model.add(layers.BatchNormalization(name="BN1_2"))
    model.add(layers.Activation('relu', name="Relu1_2"))
    model.add(layers.MaxPooling2D((2, 2), strides=(2, 2), name="Pool1"))
    model.add(layers.Dropout(0.25, name="Drop1")) # Dropout after pooling

    # Block 2
    model.add(layers.Conv2D(64, (3, 3), padding='same', kernel_regularizer=l2(l2_reg), name="Conv2_1"))
    model.add(layers.BatchNormalization(name="BN2_1"))
    model.add(layers.Activation('relu', name="Relu2_1"))
    model.add(layers.Conv2D(64, (3, 3), padding='same', kernel_regularizer=l2(l2_reg), name="Conv2_2")) # Added second conv
    model.add(layers.BatchNormalization(name="BN2_2"))
    model.add(layers.Activation('relu', name="Relu2_2"))
    model.add(layers.MaxPooling2D((2, 2), strides=(2, 2), name="Pool2"))
    model.add(layers.Dropout(0.25, name="Drop2"))

    # Block 3
    model.add(layers.Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2(l2_reg), name="Conv3_1"))
    model.add(layers.BatchNormalization(name="BN3_1"))
    model.add(layers.Activation('relu', name="Relu3_1"))
    model.add(layers.Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2(l2_reg), name="Conv3_2")) # Added second conv
    model.add(layers.BatchNormalization(name="BN3_2"))
    model.add(layers.Activation('relu', name="Relu3_2"))
    model.add(layers.MaxPooling2D((2, 2), strides=(2, 2), name="Pool3"))
    model.add(layers.Dropout(0.3, name="Drop3")) # Slightly increased dropout

    # Block 4 (Making it deeper)
    # model.add(layers.Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(l2_reg), name="Conv4_1"))
    # model.add(layers.BatchNormalization(name="BN4_1"))
    # model.add(layers.Activation('relu', name="Relu4_1"))
    # model.add(layers.MaxPooling2D((2, 2), strides=(2, 2), name="Pool4"))
    # model.add(layers.Dropout(0.3, name="Drop4"))

    # --- Classification Head ---
    model.add(layers.GlobalAveragePooling2D(name="GAP"))
    model.add(layers.Dense(128, activation='relu', kernel_regularizer=l2(l2_reg), name="Dense1")) # Reduced Dense size
    model.add(layers.Dropout(0.5, name="Drop_Dense")) # Standard dropout for dense
    model.add(layers.Dense(num_genres, activation='softmax', name="Output"))

    # --- Compile Model ---
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

In [21]:
# --- Build the Model ---
input_shape = X_train.shape[1:] # Should be (N_MELS, EXPECTED_LENGTH, 1)
print(f"\nBuilding model with input shape: {input_shape}")

model = build_custom_cnn_model(input_shape, NUM_GENRES)
model.summary()


Building model with input shape: (128, 1292, 1)


In [22]:
# ## 7. Train the Model

# --- Callbacks ---
callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        filepath=BEST_MODEL_PATH,
        save_best_only=True,
        monitor='val_accuracy', # Save based on best validation accuracy
        verbose=1
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,     # Reduce LR by factor of 5
        patience=7,     # Reduce if val_loss hasn't improved for 7 epochs
        min_lr=1e-7,    # Minimum learning rate
        verbose=1
    ),
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=20,    # Stop if val_loss hasn't improved for 20 epochs
        restore_best_weights=True, # Restore weights from the epoch with best val_loss
        verbose=1
    )
]

In [23]:
# --- Start Training ---
print("\n--- Starting Model Training ---")
history = model.fit(
    X_train, y_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_data=(X_val, y_val), # Use the dedicated validation set
    callbacks=callbacks,
    verbose=1 # Show progress bar
)
print("--- Training Complete ---")


--- Starting Model Training ---
Epoch 1/150


W0000 00:00:1745379738.235368 1404018 loop_optimizer.cc:934] Skipping loop optimization for Merge node with control input: StatefulPartitionedCall/Custom_CNN_Genre_Classifier_1/FreqMask_1/cond/branch_executed/_118


[1m 5/20[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m4:03[0m 16s/step - accuracy: 0.1402 - loss: 3.0572

In [None]:
# ## 8. Evaluate the Model

def plot_training_history(hist):
    """Plots accuracy and loss curves for training and validation sets."""
    fig, axs = plt.subplots(1, 2, figsize=(15, 5))

    # Find the epoch with the best validation accuracy
    best_val_acc_epoch = np.argmax(hist.history['val_accuracy'])
    best_val_acc = np.max(hist.history['val_accuracy'])
    best_val_loss_at_best_acc = hist.history['val_loss'][best_val_acc_epoch]

    # Accuracy subplot
    axs[0].plot(hist.history["accuracy"], label="Training Accuracy")
    axs[0].plot(hist.history["val_accuracy"], label="Validation Accuracy")
    axs[0].scatter(best_val_acc_epoch, best_val_acc, color='red', label=f'Best Val Acc: {best_val_acc:.4f}', zorder=5)
    axs[0].set_ylabel("Accuracy")
    axs[0].set_xlabel("Epoch")
    axs[0].legend(loc="lower right")
    axs[0].set_title("Accuracy Evaluation")
    axs[0].grid(True, linestyle='--', alpha=0.6)

    # Loss subplot
    axs[1].plot(hist.history["loss"], label="Training Loss")
    axs[1].plot(hist.history["val_loss"], label="Validation Loss")
    axs[1].scatter(best_val_acc_epoch, best_val_loss_at_best_acc, color='red', label=f'Val Loss at Best Val Acc: {best_val_loss_at_best_acc:.4f}', zorder=5)
    axs[1].set_ylabel("Loss")
    axs[1].set_xlabel("Epoch")
    axs[1].legend(loc="upper right")
    axs[1].set_title("Loss Evaluation")
    axs[1].grid(True, linestyle='--', alpha=0.6)

    plt.suptitle(f"Training History (Best Validation Accuracy at Epoch {best_val_acc_epoch+1})")
    plt.tight_layout(rect=[0, 0.03, 1, 0.95]) # Adjust layout to prevent title overlap
    plt.show()

In [None]:
# --- Plot History ---
print("\n--- Plotting Training History ---")
plot_training_history(history)

In [None]:
# --- Load Best Model and Evaluate on Test Set ---
print("\n--- Evaluating Best Model on Test Set ---")
try:
    # Load the best model saved by ModelCheckpoint
    # Need to provide custom objects for loading custom layers
    custom_objects = {
        "FrequencyMasking": FrequencyMasking,
        "TimeMasking": TimeMasking
    }
    best_model = models.load_model(BEST_MODEL_PATH, custom_objects=custom_objects)
    print(f"Successfully loaded best model from: {BEST_MODEL_PATH}")

    test_loss, test_accuracy = best_model.evaluate(X_test, y_test, verbose=0)
    print(f"Test Loss:     {test_loss:.4f}")
    print(f"Test Accuracy: {test_accuracy:.4f}")

In [None]:
# --- Generate Predictions for Reports ---
    y_pred_prob = best_model.predict(X_test)
    y_pred_classes = np.argmax(y_pred_prob, axis=1)
    y_true_classes = np.argmax(y_test, axis=1)

In [None]:
 # --- Confusion Matrix ---
    print("\n--- Confusion Matrix ---")
    cm = confusion_matrix(y_true_classes, y_pred_classes)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=GENRES, yticklabels=GENRES)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title('Confusion Matrix (Test Set)')
    plt.show()

In [None]:
# --- Classification Report ---
    print("\n--- Classification Report ---")
    print(classification_report(y_true_classes, y_pred_classes, target_names=GENRES, digits=4))

except Exception as e:
    print(f"Error loading or evaluating best model: {e}")
    print("Evaluation could not be completed.")

In [None]:
# ## 9. Test Prediction on a Sample

def predict_single_genre(model_to_use, spectrogram_scaled, genres_list):
    """ Predicts genre from a single scaled spectrogram. """
    if spectrogram_scaled.ndim == 3: # (height, width, channel)
        spectrogram_scaled = spectrogram_scaled[np.newaxis, ...] # Add batch dimension (1, height, width, channel)
    elif spectrogram_scaled.ndim != 4:
        raise ValueError(f"Input spectrogram has unexpected shape: {spectrogram_scaled.shape}")

    prediction = model_to_use.predict(spectrogram_scaled)[0]
    predicted_index = np.argmax(prediction)
    predicted_genre = genres_list[predicted_index]
    confidence_scores = {genre: float(score) for genre, score in zip(genres_list, prediction)}
    return predicted_genre, confidence_scores

# --- Load model if not already loaded ---
predictor_model = None
if 'best_model' in locals():
    predictor_model = best_model
else:
    try:
        custom_objects = {"FrequencyMasking": FrequencyMasking, "TimeMasking": TimeMasking}
        predictor_model = models.load_model(BEST_MODEL_PATH, custom_objects=custom_objects)
        print(f"\nLoaded best model from {BEST_MODEL_PATH} for prediction test.")
    except Exception as e:
        print(f"\nCould not load model for prediction test: {e}")

In [None]:
# --- Perform Prediction if Model Loaded ---
if predictor_model:
    print("\n--- Testing Prediction on Random Sample ---")
    # Select a random sample from the *scaled* test set
    sample_index = np.random.randint(0, len(X_test))
    sample_spectrogram_scaled = X_test[sample_index]
    true_genre_index = np.argmax(y_test[sample_index])
    true_genre = GENRES[true_genre_index]

    predicted_genre, confidence_scores = predict_single_genre(predictor_model, sample_spectrogram_scaled, GENRES)

    print(f"Sample Index: {sample_index}")
    print(f"True Genre:   {true_genre}")
    print(f"Predicted Genre: {predicted_genre}")
    print("\nConfidence Scores:")
    # Sort scores descending for display
    for genre, score in sorted(confidence_scores.items(), key=lambda item: item[1], reverse=True):
        print(f"  {genre:<10}: {score:.4f}")


In [None]:
 # --- Plot the scaled spectrogram ---
    plt.figure(figsize=(10, 4))
    display_spec = sample_spectrogram_scaled[:, :, 0] # Remove channel dim for plotting
    img = librosa.display.specshow(display_spec, x_axis='time', y_axis='mel', cmap='viridis')
    plt.colorbar(img, label='Normalized Magnitude') # Indicate it's normalized
    plt.title(f"Sample Spectrogram (Normalized) - True: {true_genre}, Predicted: {predicted_genre}")
    plt.tight_layout()
    plt.show()
else:
    print("\nSkipping prediction test as model could not be loaded.")


print("\n--- Notebook Execution Complete ---")