<a href="https://colab.research.google.com/github/Aadil404/Music-Emotion-Recognition/blob/main/notebooks/03_Model_Definition_And_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1. Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# 2. Navigate to your project directory (adjust the path if needed)
%cd /content/drive/MyDrive/MER(final-year-project)/

# 3. Install necessary libraries
!pip install librosa tensorflow pandas scikit-learn matplotlib seaborn

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/MER(final-year-project)


In [None]:
# (Mount Drive, install libraries: tensorflow, etc.)
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import os

In [None]:
# --- 1. Load Preprocessed Segment Data ---
data = np.load('processed_data/emotify_spectrograms_5s_segments.npz')
X_train, y_train = data['X_train'], data['y_train']
X_test, y_test = data['X_test'], data['y_test']

# --- 2. Define the Multi-Label CNN Model ---
input_shape = X_train.shape[1:]  # This will be (128, 216, 1) for 5-second segments
num_classes = y_train.shape[1] # Number of emotion columns

print(f"Input shape: {input_shape}")
print(f"Number of classes: {num_classes}")
print(f"Training segments: {X_train.shape[0]}")
print(f"Testing segments: {X_test.shape[0]}")
print(f"Label range: [{y_train.min():.3f}, {y_train.max():.3f}]")  # Should be 0-1 for weighted labels

Input shape: (128, 216, 1)
Number of classes: 9
Training segments: 7346
Testing segments: 1806
Label range: [0.000, 1.000]


In [None]:
X_train[1]

array([[[-0.21273045],
        [-0.21340922],
        [-0.2100883 ],
        ...,
        [-0.11451066],
        [-0.10995337],
        [-0.08351605]],

       [[-0.22505255],
        [-0.20089176],
        [-0.21638587],
        ...,
        [-0.05744929],
        [-0.05976393],
        [-0.12263771]],

       [[-0.22316606],
        [-0.17084673],
        [-0.20675841],
        ...,
        [-0.09876772],
        [-0.06971312],
        [-0.07772405]],

       ...,

       [[-0.8908704 ],
        [-0.96614456],
        [-1.        ],
        ...,
        [-1.        ],
        [-0.9144739 ],
        [-0.7795108 ]],

       [[-0.8905935 ],
        [-0.96588624],
        [-1.        ],
        ...,
        [-1.        ],
        [-0.9199661 ],
        [-0.7807682 ]],

       [[-0.8909752 ],
        [-0.96638536],
        [-1.        ],
        ...,
        [-1.        ],
        [-0.92649394],
        [-0.7840296 ]]], dtype=float32)

In [None]:
y_train[1]

array([0.18181819, 0.        , 0.09090909, 0.        , 0.09090909,
       0.45454547, 0.09090909, 0.72727275, 0.27272728], dtype=float32)

In [None]:
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

def create_segment_model(input_shape, num_classes):
    model = models.Sequential([
        # First Conv Block - smaller filters for segment-level patterns
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape, padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),

        # Second Conv Block
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.3),

        # Third Conv Block
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.35),

        # Fourth Conv Block
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.GlobalAveragePooling2D(),
        layers.Dropout(0.4),

        # Dense Layers
        layers.Dense(512, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.5),

        layers.Dense(256, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.5),

        # Output - SIGMOID for multi-label weighted probabilities
        layers.Dense(num_classes, activation='sigmoid')
    ])

    return model

# Create model
model = create_segment_model(input_shape, num_classes)

# Custom metric for multi-label F1 score
class MultiLabelF1(tf.keras.metrics.Metric):
    def __init__(self, name='f1_score', threshold=0.5, **kwargs):
        super().__init__(name=name, **kwargs)
        self.precision = tf.keras.metrics.Precision(thresholds=threshold)
        self.recall = tf.keras.metrics.Recall(thresholds=threshold)

    def update_state(self, y_true, y_pred, sample_weight=None):
        self.precision.update_state(y_true, y_pred, sample_weight)
        self.recall.update_state(y_true, y_pred, sample_weight)

    def result(self):
        p = self.precision.result()
        r = self.recall.result()
        return 2 * ((p * r) / (p + r + tf.keras.backend.epsilon()))

    def reset_states(self):
        self.precision.reset_states()
        self.recall.reset_states()

# Compile with appropriate settings for weighted probabilities
model.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='binary_crossentropy',  # Perfect for weighted probabilities (0-1)
    metrics=[
        'accuracy',
        MultiLabelF1(threshold=0.3),  # Custom F1 score
        tf.keras.metrics.AUC(name='auc', multi_label=True),
        tf.keras.metrics.Precision(name='precision', thresholds=0.3),
        tf.keras.metrics.Recall(name='recall', thresholds=0.3)
    ]
)

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
import glob
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

checkpoint_dir = 'checkpoints/segment_model/'
os.makedirs(checkpoint_dir, exist_ok=True)

# Improved checkpoint callback
checkpoint_callback = ModelCheckpoint(
    filepath=os.path.join(checkpoint_dir, 'best_model.weights.h5'),
    monitor='val_f1_score',  # Monitor F1 score instead of loss
    save_best_only=True,
    save_weights_only=True,
    mode='max',  # We want to maximize F1 score
    verbose=1
)

# Additional callbacks for better training
early_stopping = EarlyStopping(
    monitor='val_f1_score',
    patience=20,  # More patience for segment-based training
    restore_best_weights=True,
    mode='max',
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_f1_score',
    factor=0.5,
    patience=8,
    min_lr=1e-7,
    mode='max',
    verbose=1
)

# Find latest checkpoint
checkpoints = glob.glob(os.path.join(checkpoint_dir, '*.weights.h5'))
if checkpoints:
    latest_checkpoint = max(checkpoints, key=os.path.getctime)
    print(f"✅ Resuming from: {latest_checkpoint}")
    model.load_weights(latest_checkpoint)
    # Extract epoch from filename if possible, else start from 0
    try:
        initial_epoch = int(latest_checkpoint.split('epoch_')[1].split('.')[0])
    except:
        initial_epoch = 0
else:
    print("❌ No checkpoint found. Training from scratch.")
    initial_epoch = 0

print(f"Starting from epoch: {initial_epoch}")

❌ No checkpoint found. Training from scratch.
Starting from epoch: 0


In [None]:
# Calculate class weights for imbalanced emotions (optional but helpful)
def calculate_class_weights(y_train):
    """Calculate class weights for imbalanced multi-label data and return as a dictionary."""
    class_counts = np.sum(y_train, axis=0)
    total_samples = len(y_train)
    # Avoid division by zero for classes with no samples
    class_counts[class_counts == 0] = 1
    class_weights_array = total_samples / (len(class_counts) * class_counts)
    # Convert the numpy array of weights to a dictionary
    class_weights_dict = {i: weight for i, weight in enumerate(class_weights_array)}
    return class_weights_dict

class_weights = calculate_class_weights(y_train)
print("Class weights:", class_weights)

# Enhanced training with more epochs for segment-based approach
history = model.fit(
    X_train, y_train,
    epochs=100,  # More epochs for segment-based training
    batch_size=32,
    validation_data=(X_test, y_test),
    callbacks=[checkpoint_callback, early_stopping, reduce_lr],
    initial_epoch=initial_epoch,
    class_weight=class_weights,  # Handle class imbalance
    verbose=1
)

Class weights: {0: np.float32(0.82283634), 1: np.float32(0.5710493), 2: np.float32(0.61373276), 3: np.float32(0.44373468), 4: np.float32(0.370971), 5: np.float32(0.5961752), 6: np.float32(0.42075852), 7: np.float32(0.4820439), 8: np.float32(0.5933279)}
Epoch 1/100
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step - accuracy: 0.1294 - auc: 0.5259 - f1_score: 0.7697 - loss: 0.4565 - precision: 0.8658 - recall: 0.6927
Epoch 1: val_f1_score improved from -inf to 0.73562, saving model to checkpoints/segment_model/best_model.weights.h5
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 178ms/step - accuracy: 0.1294 - auc: 0.5259 - f1_score: 0.7697 - loss: 0.4565 - precision: 0.8658 - recall: 0.6928 - val_accuracy: 0.0914 - val_auc: 0.5855 - val_f1_score: 0.7356 - val_loss: 0.7067 - val_precision: 0.8273 - val_recall: 0.6622 - learning_rate: 1.0000e-04
Epoch 2/100
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - accuracy:

In [None]:
# Predict on a single sample
# Add a batch dimension to the input data for prediction
input_sample = np.expand_dims(X_test[110], axis=0)
predictions = model.predict(input_sample)
print(predictions)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[[0.5245677  0.7753858  0.8001532  0.12771696 0.32903787 0.7748723
  0.22712222 0.64705217 0.3228753 ]]


In [None]:
y_test[110]

array([0.05882353, 0.29411766, 0.23529412, 0.4117647 , 0.47058824,
       0.        , 0.        , 0.1764706 , 0.29411766], dtype=float32)

In [None]:
# Evaluate the model on the test set
print("Evaluating the model on the test set...")
results = model.evaluate(X_test, y_test, batch_size=32, verbose=1)

# Print the evaluation results
print("Test Loss:", results[0])
# Assuming the order of metrics in model.compile matches the results list
metric_names = model.metrics_names
for name, value in zip(metric_names, results):
    print(f"Test {name}: {value}")

Evaluating the model on the test set...
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - accuracy: 0.0854 - auc: 0.5425 - f1_score: 0.7283 - loss: 0.8680 - precision: 0.8030 - recall: 0.6665
Test Loss: 0.8627260327339172
Test loss: 0.8627260327339172
Test compile_metrics: 0.09302325546741486


In [None]:
results

[0.8627260327339172,
 0.09302325546741486,
 0.737583339214325,
 0.5411592125892639,
 0.8277731537818909,
 0.6651159524917603]