## Environment Setup

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    confusion_matrix
)


## Data Loading & Exploration

In [None]:
# 2.1 Unzip the dataset you’ve uploaded
import zipfile

# adjust path if needed; here we assume RAVDESS.zip is in the notebook’s working dir
with zipfile.ZipFile("RAVDESS.zip", "r") as zip_ref:
    zip_ref.extractall("RAVDESS")   # extracts into ./RAVDESS/

# 2.2 Point to the extracted folder
DATA_PATH = "RAVDESS"
assert os.path.isdir(DATA_PATH), "RAVDESS folder not found!"

# 2.3 Gather all .wav file paths
all_files = [
    os.path.join(DATA_PATH, actor_dir, file)
    for actor_dir in os.listdir(DATA_PATH)
    for file in os.listdir(os.path.join(DATA_PATH, actor_dir))
    if file.endswith(".wav")
]
print(f"Total audio files: {len(all_files)}")

# 2.4 Quick class‐distribution check
def emotion_from_filename(fn):
    code = fn.split("-")[2]
    return {
        "01":"neutral","02":"calm","03":"happy","04":"sad",
        "05":"angry","06":"fearful","07":"disgust","08":"surprised"
    }[code]

labels = [emotion_from_filename(f) for f in all_files]
unique, counts = np.unique(labels, return_counts=True)
print("Class distribution:")
for u, c in zip(unique, counts):
    print(f"  {u}: {c}")


##  ===== 🎧 Visualizing One Audio Sample per Emotion =====

In [None]:
from IPython.display import Audio, display, HTML
import librosa.display
import matplotlib.pyplot as plt
import numpy as np

print("🎧 Emotion Samples\n")

seen = set()
plt.figure(figsize=(15, 8))

for idx, (file, label) in enumerate(zip(all_files, labels)):
    if label not in seen:
        seen.add(label)

        y, sr = librosa.load(file, sr=22050)

        # Header for each emotion
        display(HTML(f"<h3>🎙️ Emotion: <span style='color:blue'>{label}</span></h3>"))

        # Audio Player
        display(Audio(y, rate=sr))

        # Plot
        fig, ax = plt.subplots(1, 2, figsize=(14, 3))

        # Waveform
        librosa.display.waveshow(y, sr=sr, ax=ax[0])
        ax[0].set_title(f'Waveform - {label}')
        ax[0].set_xlabel("Time")
        ax[0].set_ylabel("Amplitude")

        # Spectrogram
        mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
        img = librosa.display.specshow(mel_spec_db, sr=sr, x_axis='time', y_axis='mel', ax=ax[1])
        ax[1].set_title(f'Mel Spectrogram - {label}')
        fig.colorbar(img, ax=ax[1], format="%+2.0f dB")

        plt.tight_layout()
        plt.show()

    if len(seen) == len(np.unique(labels)):
        break


## Audio Preprocessing & Feature Extraction

In [None]:
SAMPLE_RATE = 22050
DURATION = 3        # seconds
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION

def extract_log_mel_spectrogram(file_path, n_mels=128):
    y, sr = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)
    if len(y) < SAMPLES_PER_TRACK:
        y = np.pad(y, (0, SAMPLES_PER_TRACK - len(y)))
    else:
        y = y[:SAMPLES_PER_TRACK]
    # The librosa.feature.melspectrogram function expects keyword arguments
    mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
    log_mel = librosa.power_to_db(mel, ref=np.max)
    return log_mel

## Dataset Preparation

In [None]:
X, y = [], []
for f in all_files:
    X.append(extract_log_mel_spectrogram(f))
    y.append(emotion_from_filename(f))

X = np.array(X)[..., np.newaxis]   # shape: (N, n_mels, T, 1)
le = LabelEncoder()
y_enc = le.fit_transform(y)
y_cat = tf.keras.utils.to_categorical(y_enc)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_cat,
    test_size=0.2,
    random_state=42,
    stratify=y_enc
)


## Model Architecture

In [None]:
def build_model(input_shape, num_classes):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=input_shape),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2,2)),

        tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2,2)),

        tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2,2)),

        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    return model

input_shape = X_train.shape[1:]   # (n_mels, T, 1)
num_classes = y_cat.shape[1]
model = build_model(input_shape, num_classes)
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
model.summary()


## Training & Hyperparameter Tuning

In [None]:
EPOCHS = 50
BATCH_SIZE = 32

callbacks = [
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss', factor=0.5, patience=5
    ),
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=10, restore_best_weights=True
    )
]

history = model.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=callbacks
)


# Evaluation

In [None]:
# 7. EVALUATION
from sklearn.metrics import classification_report

# 7.1 Evaluate model to get test loss & accuracy
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss:     {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")

# 7.2 Inference on test set
y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test, axis=1)

# 7.3 Compute precision, recall, F1-score (weighted) and display classification report
prec, rec, f1, _ = precision_recall_fscore_support(
    y_true, y_pred, average='weighted'
)
print(f"Precision:     {prec:.4f}")
print(f"Recall:        {rec:.4f}")
print(f"F1-score:      {f1:.4f}\n")

# Detailed per-class report
print("Classification Report:")
print(classification_report(
    y_true, y_pred,
    target_names=le.classes_
))


In [None]:
best_train_acc = max(history.history['accuracy'])
best_train_epoch = history.history['accuracy'].index(best_train_acc) + 1

lowest_train_loss = min(history.history['loss'])
lowest_train_epoch = history.history['loss'].index(lowest_train_loss) + 1

best_val_acc = max(history.history['val_accuracy'])
best_val_epoch = history.history['val_accuracy'].index(best_val_acc) + 1

lowest_val_loss = min(history.history['val_loss'])
lowest_val_epoch = history.history['val_loss'].index(lowest_val_loss) + 1

print(f"Best Training Accuracy: {best_train_acc:.4f} at epoch {best_train_epoch}")
print(f"Lowest Training Loss: {lowest_train_loss:.4f} at epoch {lowest_train_epoch}")
print(f"Best Validation Accuracy: {best_val_acc:.4f} at epoch {best_val_epoch}")
print(f"Lowest Validation Loss: {lowest_val_loss:.4f} at epoch {lowest_val_epoch}")


In [None]:
import json

# 1) Convert training history to an epoch-wise list of dictionaries
epoch_data = []
num_epochs = len(history.history['loss'])

for i in range(num_epochs):
    epoch_info = {
        "epoch":        i + 1,
        "loss":         history.history['loss'][i],
        "accuracy":     history.history['accuracy'][i],
        "val_loss":     history.history['val_loss'][i],
        "val_accuracy": history.history['val_accuracy'][i]
    }
    epoch_data.append(epoch_info)

# 2) Evaluate on the test set to get final test loss & accuracy
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)

final_results = {
    "test_loss":     test_loss,
    "test_accuracy": test_acc
}

# 3) Combine into one dictionary
output = {
    "training_history":   epoch_data,
    "final_evaluation":   final_results
}

# 4) Save to JSON file
with open("cnn_training_summary.json", "w") as f:
    json.dump(output, f, indent=4)

print("Saved training+evaluation summary to cnn_training_summary.json")


## Visualization

In [None]:
# 7.3 Enhanced Confusion Matrix Plot
import itertools

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion Matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        fmt = '.2f'
    else:
        fmt = 'd'

    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar(fraction=0.046, pad=0.04)

    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45, ha='right')
    plt.yticks(tick_marks, classes)

    thresh = cm.max() / 2.0
    # Annotate cells
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(
            j, i, format(cm[i, j], fmt),
            horizontalalignment='center',
            color='white' if cm[i, j] > thresh else 'black'
        )

    plt.tight_layout()
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.show()

# Compute confusion matrix as before
cm = confusion_matrix(y_true, y_pred)
class_names = le.classes_

# Plot raw counts
plot_confusion_matrix(cm, classes=class_names, normalize=False,
                      title='Confusion Matrix (Counts)')

# Plot normalized
plot_confusion_matrix(cm, classes=class_names, normalize=True,
                      title='Confusion Matrix (Normalized)')


In [None]:
import matplotlib.pyplot as plt

# Set a different style for the plots (ggplot style)
plt.style.use('ggplot')

# Create a figure with subplots for accuracy and loss
fig, ax = plt.subplots(1, 2, figsize=(14, 6))

# Accuracy plot
ax[0].plot(history.history['accuracy'], label='Train Accuracy', color='green', linestyle='-', marker='o', markersize=5)
ax[0].plot(history.history['val_accuracy'], label='Val Accuracy', color='red', linestyle='--', marker='x', markersize=5)
ax[0].set_title('Model Accuracy Over Epochs', fontsize=16)
ax[0].set_xlabel('Epochs', fontsize=14)
ax[0].set_ylabel('Accuracy', fontsize=14)
ax[0].legend(loc='best')
ax[0].grid(True, which='both', linestyle='-.', color='gray', alpha=0.7)

# Loss plot
ax[1].plot(history.history['loss'], label='Train Loss', color='blue', linestyle='-', marker='o', markersize=5)
ax[1].plot(history.history['val_loss'], label='Val Loss', color='purple', linestyle='--', marker='x', markersize=5)
ax[1].set_title('Model Loss Over Epochs', fontsize=16)
ax[1].set_xlabel('Epochs', fontsize=14)
ax[1].set_ylabel('Loss', fontsize=14)
ax[1].legend(loc='best')
ax[1].grid(True, which='both', linestyle='-.', color='gray', alpha=0.7)

# Adjust layout and show the plots
plt.tight_layout()
plt.show()


##  Deployment Snippet

In [None]:
def predict_emotion(model, file_path):
    """
    Given a trained model and a .wav file path,
    returns the predicted emotion label.
    """
    feat = extract_log_mel_spectrogram(file_path)
    feat = feat[np.newaxis, ..., np.newaxis]  # shape: (1, n_mels, T, 1)
    prob = model.predict(feat)[0]
    idx = np.argmax(prob)
    return le.inverse_transform([idx])[0]

# Example:
# print(predict_emotion(model, "path_to_new_audio.wav"))
