# Pattern Recognition: Speech Emotion Recognition
**Team Members:**
- AbdElRahman Bassam
- AbdElRahman Osama
- Ahmed Youssef

# Imports

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display
import os
import math
import IPython.display as ipd
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
import glob
import itertools
import json

RANDOM_SEED = 42

# Dataset Exploration

In [None]:
CREMA_PATH = '../input/speech-emotion-recognition-en/Crema/'

EMOTION_MAPPING = {
    "SAD" : "sadness",
    "ANG" : "angry",
    "DIS" : "disgust",
    "FEA" : "fear",
    "HAP" : "happy",
    "NEU" : "neutral"
}

female_ids = [1002,1003,1004,1006,1007,1008,1009,1010,1012,1013,1018,1020,1021,
              1024,1025,1028,1029,1030,1037,1043,1046,1047,1049,1052,1053,1054,
              1055,1056,1058,1060,1061,1063,1072,1073,1074,1075,1076,1078,1079,
              1082,1084,1089,1091]

In [None]:
# load audio from path, with sr as default=16KHz (to resample the audio to unique sr)
def load_audio(path, sr=16000):
  audio, sr = librosa.load(path, sr=sr)
  return audio, sr

def get_emotion_from_filename(filename):
  return EMOTION_MAPPING.get(filename.split("_")[2])

def get_gender_from_filename(filename):
    actor_id = int(filename.split("_")[0])  # Assuming the actor ID is the first part of the filename
    return 'female' if actor_id in female_ids else 'male'

In [None]:
def explore_audio(audio_path):
    filename = os.path.basename(audio_path)
    print(f"\nExploring: {filename}")
    audio, sr = load_audio(path=audio_path, sr=None)
    ipd.display(ipd.Audio(audio, rate=sr))
    plt.figure(figsize=(10, 3))
    librosa.display.waveshow(audio, sr=sr)
    plt.suptitle(f"Waveform - {filename}")
    plt.title(f"Emotion: {get_emotion_from_filename(filename)}")
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude")
    plt.grid(True)
    plt.tight_layout()
    plt.show()

def explore_dataset(dataset_path):
  emotions_visited = set()
  one_sample_per_emotion = {}

  for filename in os.listdir(dataset_path):
    if filename.endswith(".wav"):
      emotion = get_emotion_from_filename(filename)
      if emotion in emotions_visited:
        continue
      one_sample_per_emotion[emotion] = os.path.join(dataset_path, filename)
      emotions_visited.add(emotion)
      if len(emotions_visited) == len(EMOTION_MAPPING):
        break

  for emotion, path in one_sample_per_emotion.items():
    print(f"\nEmotion: {emotion}")
    explore_audio(path)

In [None]:
explore_dataset(CREMA_PATH)

# Creating Feature Spaces

In [None]:
# Extract zero-crossing-rate sequence
def extract_zcr_seq(y, frame_length=2048, hop_length=512):
    zcr_seq = librosa.feature.zero_crossing_rate(y, frame_length=frame_length, hop_length=hop_length)[0]
    return zcr_seq

# Extract Root Mean Square sequence
def extract_rms_seq(y, frame_length=2048, hop_length=512):
    rms_seq = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length)[0]
    return rms_seq

# Extract Mel Spectrogram with 128 Mel bands
def extract_mel_spectrogram(y, sr, n_mels=128, frame_length=2048, hop_length=512):
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=frame_length, hop_length=hop_length, n_mels=n_mels)
    mel_spec_db = librosa.power_to_db(mel_spec)
    return mel_spec_db

In [None]:
# X_zcr = [] # zero-crossing-rate
# X_rms = [] # RMS
# X_mel = [] # mel spectogram data
y = [] # labels
y_gender = [] # labels consedring the gender
mx_audio_len = 0
# find max audio len
for filename in os.listdir(CREMA_PATH):
  if filename.endswith(".wav"):
    filepath = os.path.join(CREMA_PATH, filename)
    audio, _ = load_audio(filepath)
    mx_audio_len = max(mx_audio_len,len(audio))

In [None]:
# for filename in sorted(os.listdir(CREMA_PATH)):
#   if filename.endswith(".wav"):
#     filepath = os.path.join(CREMA_PATH, filename)
#     audio, sr = load_audio(filepath)
#     # pad center the audio so all have same length
#     audio = librosa.util.pad_center(audio, size=mx_audio_len)
#     X_zcr.append(extract_zcr_seq(audio))
#     X_rms.append(extract_rms_seq(audio))
#     X_mel.append(extract_mel_spectrogram(audio, sr))
#     emotion = get_emotion_from_filename(filepath)
#     gender = get_gender_from_filename(filename)
#     y.append(emotion)
#     y_gender.append(f"{emotion}_{gender}")

In [None]:
CACHE_DIR = "/kaggle/working/saved_features"
os.makedirs(CACHE_DIR, exist_ok=True)

X_zcr, X_rms, X_mel, y, y_gender = [], [], [], [], []

for filename in sorted(os.listdir(CREMA_PATH)):
    if filename.endswith(".wav"):
        cache_path = os.path.join(CACHE_DIR, f"{filename}.npz")

        if os.path.exists(cache_path):
            data = np.load(cache_path)
            zcr, rms, mel = data['zcr'], data['rms'], data['mel']
        else:
            filepath = os.path.join(CREMA_PATH, filename)
            audio, sr = load_audio(filepath)
            audio = librosa.util.pad_center(audio, size=mx_audio_len)
            zcr = extract_zcr_seq(audio)
            rms = extract_rms_seq(audio)
            mel = extract_mel_spectrogram(audio, sr)
            np.savez(cache_path, zcr=zcr, rms=rms, mel=mel)

        X_zcr.append(zcr)
        X_rms.append(rms)
        X_mel.append(mel)

        emotion = get_emotion_from_filename(filename)
        gender = get_gender_from_filename(filename)
        y.append(emotion)
        y_gender.append(f"{emotion}_{gender}")

In [None]:
X_zcr = np.array(X_zcr)
X_energy = np.array(X_rms)
X_mel = np.array(X_mel)
y = np.array(y)
y_gender = np.array(y_gender)
print(np.shape(X_zcr))
print(np.shape(X_rms))
print(np.shape(X_mel))
print(np.shape(y))
print(np.shape(y_gender))

In [None]:
# visualize 3 random samples in feature spaces
np.random.seed(RANDOM_SEED)
indices = np.random.choice(X_mel.shape[0], size=3, replace=False)

plt.figure(figsize=(20, 10))

# Plot mel spectograms
for i, idx in enumerate(indices):
    plt.subplot(3, 3, i+1)
    librosa.display.specshow(X_mel[idx], sr=16000, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title(f"Sample #{idx}")

# Plot RMSE
for i, idx in enumerate(indices):
    plt.subplot(3, 3, i+4)
    plt.plot(X_rms[idx])
    plt.title(f"RMSE - Sample #{idx}")
    plt.xlabel("Frame")
    plt.ylabel("RMSE")

# Plot ZCR
for i, idx in enumerate(indices):
    plt.subplot(3, 3, i+7)
    plt.plot(X_zcr[idx])
    plt.title(f"ZCR - Sample #{idx}")
    plt.xlabel("Frame")
    plt.ylabel("ZCR")

plt.tight_layout()
plt.show()

In [None]:
# Plotting class frequencies
plt.figure(figsize=(10, 6))

class_counts = np.unique(y, return_counts=True)
sns.barplot(x=class_counts[0], y=class_counts[1])

plt.title('Class Frequency Distribution')
plt.xlabel('Emotion Classes')
plt.ylabel('Frequency')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Plotting class frequencies
plt.figure(figsize=(10, 6))

class_counts = np.unique(y_gender, return_counts=True)
sns.barplot(x=class_counts[0], y=class_counts[1])

plt.title('Class Frequency Distribution Consedring Gender)')
plt.xlabel('Emotion_Gender Classes')
plt.ylabel('Frequency')
plt.xticks(rotation=45)
plt.show()

In [None]:
X_time = np.stack([X_zcr, X_rms], axis=1) # stack time features in to one feature space
print(X_time.shape)
# Add channel dimension at axis=1 -> shape becomes (7442, 1, 128, 157)
X_mel = X_mel[:, np.newaxis, :, :]
print(X_mel.shape)

In [None]:
def split_data(X_time, X_mel, y, y_gender, test_size=0.3, val_size=0.05, random_state=42):
    # First split into train+val and test
    idx_trainval, idx_test = train_test_split(
        np.arange(len(y)), stratify=y, test_size=test_size, random_state=random_state
    )

    # Then split train+val into train and val
    y_trainval = y[idx_trainval]
    idx_train, idx_val = train_test_split(
        idx_trainval, stratify=y_trainval, test_size=val_size, random_state=random_state
    )

    # Use indices to slice all arrays
    def split_arrays(arr):
        return arr[idx_train], arr[idx_val], arr[idx_test]

    X_time_train, X_time_val, X_time_test = split_arrays(X_time)
    X_mel_train, X_mel_val, X_mel_test = split_arrays(X_mel)
    y_train, y_val, y_test = split_arrays(y)
    y_gender_train, y_gender_val, y_gender_test = split_arrays(y_gender)

    return (X_time_train, X_time_val, X_time_test,
            X_mel_train, X_mel_val, X_mel_test,
            y_train, y_val, y_test,
            y_gender_train, y_gender_val, y_gender_test,
            idx_train)


In [None]:
(X_time_train, X_time_val, X_time_test,
 X_mel_train, X_mel_val, X_mel_test,
 y_train, y_val, y_test,
 y_train_g, y_val_g, y_test_g, idx_train) = split_data(X_time, X_mel, y, y_gender)

# Data Augmentation

In [None]:
train_set_names = np.array(sorted(glob.glob(os.path.join(CREMA_PATH, "*.wav"))))[idx_train]
sample_audio_path = train_set_names[0]
sample_audio, sr = load_audio(sample_audio_path,16000)
sample_audio = librosa.util.pad_center(sample_audio, size=mx_audio_len)

In [None]:
def noise(data):
    noise_amp = 0.035*np.random.uniform()*np.amax(data)
    data = data + noise_amp*np.random.normal(size=data.shape[0])
    return data

def shift(data):
    shift_range = int(np.random.uniform(low=-5, high = 5)*1000)
    return np.roll(data, shift_range)

def pitch(data, sr, n_steps=-1):
    return librosa.effects.pitch_shift(y=data, sr=sr, n_steps=n_steps)

In [None]:
print("Original Audio")
ipd.display(ipd.Audio(sample_audio, rate=sr))
plt.figure(figsize=(14, 4))
librosa.display.waveshow(sample_audio, sr=sr)

In [None]:
print("With Noise Audio")
aug = noise(sample_audio)
ipd.display(ipd.Audio(aug, rate=sr))
plt.figure(figsize=(14, 4))
librosa.display.waveshow(aug, sr=sr)

In [None]:
print("With Lower Pitch Audio")
aug = pitch(sample_audio,sr)
ipd.display(ipd.Audio(aug, rate=sr))
plt.figure(figsize=(14, 4))
librosa.display.waveshow(aug, sr=sr)

In [None]:
print("Shifted Audio")
aug = shift(sample_audio)
ipd.display(ipd.Audio(aug, rate=sr))
plt.figure(figsize=(14, 4))
librosa.display.waveshow(aug, sr=sr)

In [None]:
def augment(audio,sr):
    return noise(audio), pitch(audio,sr), shift(audio)

is_augmented = False
if is_augmented:
    aug_X_time = []
    aug_X_mel = []
    aug_y = []
    aug_y_gender = []
    for path in train_set_names:
        filename = os.path.basename(path)
        audio,sr = load_audio(path)
        audio = librosa.util.pad_center(sample_audio, size=mx_audio_len)
        np.random.seed(RANDOM_SEED)
        audio_noise, audio_pitch, audio_shift = augment(audio, sr)
        emotion = get_emotion_from_filename(filename)
        gender = get_gender_from_filename(filename)
    
        # Extract features for each augmentation
        for aug_audio in [audio_noise, audio_pitch, audio_shift]:
            zcr = extract_zcr_seq(aug_audio)
            rms = extract_rms_seq(aug_audio)
            mel = extract_mel_spectrogram(aug_audio, sr)
    
            time_feats = np.stack([zcr, rms], axis=0) # Stack time features -> (2, 157)
            mel = mel[np.newaxis, :, :]  # Add channel dim -> (1, 128, 157)
    
            # Append to lists
            aug_X_time.append(time_feats)
            aug_X_mel.append(mel)
            aug_y.append(emotion)
            aug_y_gender.append(gender)

In [None]:
print(len(X_time_train)) # get number of samples in training before augmentation

In [None]:
if is_augmented:
    X_time_train = np.concatenate([X_time_train, aug_X_time], axis=0)
    X_mel_train = np.concatenate([X_mel_train, aug_X_mel], axis=0)
    y_train = np.concatenate([y_train, aug_y], axis=0)
    y_train_g = np.concatenate([y_train_g, aug_y_gender], axis=0)
    
    print(X_time_train.shape)
    print(X_mel_train.shape)
    print(y_train.shape)
    print(y_train_g.shape)

In [None]:
if is_augmented:
    # Shuffle the training set
    num_samples = X_time_train.shape[0]
    
    np.random.seed(RANDOM_SEED)
    # Generate a shuffled index
    shuffled_indices = np.random.permutation(num_samples)
    
    # Shuffle all arrays using the same indices
    X_time_train = X_time_train[shuffled_indices]
    X_mel_train = X_mel_train[shuffled_indices]
    y_train = y_train[shuffled_indices]
    y_train_g = y_train_g[shuffled_indices]

# Training Pipeline

In [None]:
normalize = False
if normalize:
    # Initialize scalers for ZCR and RMS
    scaler_zcr = StandardScaler()
    scaler_rms = StandardScaler()
    
    # Fit on training data (X_time_train) and transform all datasets
    X_time_train[:, 0, :] = scaler_zcr.fit_transform(X_time_train[:, 0, :])  # Normalize ZCR
    X_time_train[:, 1, :] = scaler_rms.fit_transform(X_time_train[:, 1, :])  # Normalize RMS
    
    # Apply the same transformation to validation and test sets
    X_time_val[:, 0, :] = scaler_zcr.transform(X_time_val[:, 0, :])
    X_time_val[:, 1, :] = scaler_rms.transform(X_time_val[:, 1, :])
    
    X_time_test[:, 0, :] = scaler_zcr.transform(X_time_test[:, 0, :])
    X_time_test[:, 1, :] = scaler_rms.transform(X_time_test[:, 1, :])

In [None]:
# One-hot encode
ohe = OneHotEncoder(sparse_output=False)
y_train_oh = ohe.fit_transform(y_train.reshape(-1, 1))
y_val_oh = ohe.transform(y_val.reshape(-1, 1))
y_test_oh = ohe.transform(y_test.reshape(-1, 1))

ohe_g = OneHotEncoder(sparse_output=False)
y_train_oh_g = ohe_g.fit_transform(y_train_g.reshape(-1, 1))
y_val_oh_g = ohe_g.transform(y_val_g.reshape(-1, 1))
y_test_oh_g = ohe_g.transform(y_test_g.reshape(-1, 1))

In [None]:
class CremaDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Convert data to the correct dtype
X_time_train = X_time_train.astype(np.float32)
X_time_val = X_time_val.astype(np.float32)
X_time_test = X_time_test.astype(np.float32)

y_train_oh = y_train_oh.astype(np.float32)
y_val_oh = y_val_oh.astype(np.float32)
y_test_oh = y_test_oh.astype(np.float32)
y_train_oh_g = y_train_oh_g.astype(np.float32)
y_val_oh_g = y_val_oh_g.astype(np.float32)
y_test_oh_g = y_test_oh_g.astype(np.float32)

batch_size = 64

# Create datasets for both y and y_gender
time_train_loader = DataLoader(CremaDataset(X_time_train, y_train_oh), batch_size=batch_size, shuffle=True)
time_val_loader = DataLoader(CremaDataset(X_time_val, y_val_oh), batch_size=batch_size)
time_test_loader = DataLoader(CremaDataset(X_time_test, y_test_oh), batch_size=batch_size)

time_train_loader_g = DataLoader(CremaDataset(X_time_train, y_train_oh_g), batch_size=batch_size, shuffle=True)
time_val_loader_g = DataLoader(CremaDataset(X_time_val, y_val_oh_g), batch_size=batch_size)
time_test_loader_g = DataLoader(CremaDataset(X_time_test, y_test_oh_g), batch_size=batch_size)

# Repeat for mel data
mel_train_loader = DataLoader(CremaDataset(X_mel_train, y_train_oh), batch_size=batch_size, shuffle=True)
mel_val_loader = DataLoader(CremaDataset(X_mel_val, y_val_oh), batch_size=batch_size)
mel_test_loader = DataLoader(CremaDataset(X_mel_test, y_test_oh), batch_size=batch_size)

mel_train_loader_g = DataLoader(CremaDataset(X_mel_train, y_train_oh_g), batch_size=batch_size, shuffle=True)
mel_val_loader_g = DataLoader(CremaDataset(X_mel_val, y_val_oh_g), batch_size=batch_size)
mel_test_loader_g = DataLoader(CremaDataset(X_mel_test, y_test_oh_g), batch_size=batch_size)


In [None]:
class TimeNet(nn.Module):
    def __init__(self, num_classes):
        super(TimeNet, self).__init__()

        self.relu = nn.ReLU()

        self.conv1 = nn.Conv1d(in_channels=2, out_channels=256, kernel_size=3, stride=2, padding=1)  # (B, 256, 78)
        self.pool1 = nn.MaxPool1d(kernel_size=2)  # (B, 256, 39)

        self.conv2 = nn.Conv1d(in_channels=256, out_channels=128, kernel_size=3, padding=1)  # (B, 128, 39)
        self.pool2 = nn.MaxPool1d(kernel_size=2)  # (B, 128, 19)

        self.conv3 = nn.Conv1d(in_channels=128, out_channels=128, kernel_size=3, padding=1)  # (B, 128, 19)
        self.pool3 = nn.MaxPool1d(kernel_size=2)  # (B, 128, 9)

        self.dropout1 = nn.Dropout(0.2)

        self.conv4 = nn.Conv1d(in_channels=128, out_channels=64, kernel_size=3, padding=1)  # (B, 64, 9)
        self.pool4 = nn.AvgPool1d(kernel_size=2)  # (B, 64, 4)

        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(64 * 4, 32)
        self.dropout2 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(32, num_classes)

    def forward(self, x):  # x: (B, 2, 157)
        x = self.relu(self.conv1(x))   # (B, 256, 78)
        x = self.pool1(x)              # (B, 256, 39)

        x = self.relu(self.conv2(x))   # (B, 128, 39)
        x = self.pool2(x)              # (B, 128, 19)

        x = self.relu(self.conv3(x))   # (B, 128, 19)
        x = self.pool3(x)              # (B, 128, 9)

        x = self.dropout1(x)

        x = self.relu(self.conv4(x))   # (B, 64, 9)
        x = self.pool4(x)              # (B, 64, 4)

        x = self.flatten(x)            # (B, 256)
        x = self.relu(self.fc1(x))     # (B, 32)
        x = self.dropout2(x)
        x = self.fc2(x)                # (B, num_classes)
        return x

In [None]:
def confusion_matrix_to_labels(cm):
    y_true = []
    y_pred = []
    for true_label in range(cm.shape[0]):
        for pred_label in range(cm.shape[1]):
            count = cm[true_label, pred_label]
            y_true.extend([true_label] * count)
            y_pred.extend([pred_label] * count)
    return np.array(y_true), np.array(y_pred)

In [None]:
def evaluate_and_pool_confusion_matrix(cm, class_names):
    # 2x2 sum pooling
    if cm.shape[0] % 2 != 0 or cm.shape[1] % 2 != 0:
        raise ValueError("Confusion matrix dimensions must be even for 2x2 pooling.")
    
    pooled_cm = []
    
    # Perform 2x2 sum pooling
    for i in range(0, cm.shape[0], 2):
        row = []
        for j in range(0, cm.shape[1], 2):
            block_sum = np.sum(cm[i:i+2, j:j+2])  # Sum the 2x2 block
            row.append(block_sum)
        pooled_cm.append(row)
    
    pooled_cm = np.array(pooled_cm)
    y_true, y_pred = confusion_matrix_to_labels(pooled_cm)
    # Calculate accuracy and F1 score
    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='weighted')

    print("Accuracy:", acc)
    print("F1 Score:", f1)
    print("Classification Report:")
    print(classification_report(y_true, y_pred, target_names=class_names))

    # Display the pooled confusion matrix
    sns.heatmap(pooled_cm, annot=True, fmt='d', xticklabels=class_names[::], yticklabels=class_names[::], cmap='Blues')
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.title("Pooled Confusion Matrix")
    plt.show()

    return acc, f1, pooled_cm

In [None]:
def train_model(model, train_loader, val_loader, y_val, class_names,
                epochs=100, lr=0.001, patience=10, pool = False,isTuning = False):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    best_val_loss = float('inf')
    patience_counter = 0
    best_model_state = None

    train_losses = []
    val_losses = []
    train_correct= 0
    train_total=0
    for epoch in range(epochs):
        # Training
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)

            optimizer.zero_grad()
            outputs = model(X_batch)
            _, predicted = torch.max(outputs, 1)
            labels = torch.argmax(y_batch, dim=1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            train_correct += (predicted == labels).sum().item()
            train_total += labels.size(0)
            
        avg_train_loss = train_loss / len(train_loader)

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch = X_batch.to(device)
                y_batch = y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, torch.argmax(y_batch, dim=1))
                val_loss += loss.item()

        avg_val_loss = val_loss / len(val_loader)

        train_losses.append(avg_train_loss)
        val_losses.append(avg_val_loss)
        print(f"Epoch {epoch+1}/{epochs} - Avg. Train Loss: {avg_train_loss:.4f} - Avg. Val Loss: {avg_val_loss:.4f}")

        # Early Stopping Check
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            patience_counter = 0
            best_model_state = model.state_dict()  # Save best model
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping triggered after {epoch+1} epochs.")
                break
    # Plot losses over epochs
    if(not isTuning):
        plt.plot(train_losses, label='Avg. Train Loss')
        plt.plot(val_losses, label='Avg. Val Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.title('Training vs Validation Loss')
        plt.xticks(range(0, len(train_losses), 10))
        plt.legend()
        plt.grid(True)
        plt.show()

    # Load the best model before returning
    if best_model_state:
        model.load_state_dict(best_model_state)
    if(not isTuning):
        print("Training complete.")
        train_accuracy = train_correct / train_total
        print(f"Total Train Accuracy: {train_accuracy:.4f}")
        print("Evaluating on validation set...")
    acc, f1, cm, report = evaluate_model(model, val_loader, y_val, class_names, pool,isTuning)
    return model, acc, f1, cm, report

In [None]:
def evaluate_model(model, data_loader, y_true_onehot, class_names, pool=False,isTuning = False ):
    model.eval()
    device = next(model.parameters()).device

    all_preds = []

    with torch.no_grad():
        for X_batch, _ in data_loader:
            X_batch = X_batch.to(device)
            outputs = model(X_batch)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()
            all_preds.extend(preds)

    y_true = np.argmax(y_true_onehot, axis=1)
    acc = accuracy_score(y_true, all_preds)
    f1 = f1_score(y_true, all_preds, average='weighted')
    cm = confusion_matrix(y_true, all_preds)
    report = classification_report(y_true, all_preds, target_names=class_names, output_dict=True)
    if pool:
        reduced_class_names = [name.split('_')[0] for i, name in enumerate(class_names) if i % 2 == 0]
        acc, f1, pooled_cm = evaluate_and_pool_confusion_matrix(cm, reduced_class_names)
        return acc, f1, pooled_cm, report
    if(not isTuning):
        print("Accuracy:", acc)
        print("F1 Score:", f1)
        print("Classification Report:")
        print(classification_report(y_true, all_preds, target_names=class_names))
        sns.heatmap(cm, annot=True, fmt='d', xticklabels=class_names, yticklabels=class_names, cmap='Blues')
        plt.xlabel("Predicted")
        plt.ylabel("True")
        plt.title("Confusion Matrix")
        plt.show()

    return acc, f1, cm, report

In [None]:
def set_seed(seed=RANDOM_SEED):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [None]:
set_seed(RANDOM_SEED)
time_model = TimeNet(num_classes=y_train_oh.shape[1])
time_model, time_acc, time_f1, time_cm, time_report = train_model(time_model, time_train_loader, time_val_loader, y_val_oh, class_names=ohe.categories_[0])

In [None]:
print("Evaluating the Time-domain model on Test Set")
test_time_acc, test_time_f1, test_time_cm, test_time_report = evaluate_model(time_model, time_test_loader, y_test_oh, class_names=ohe.categories_[0])

In [None]:
set_seed(RANDOM_SEED)
time_model_g = TimeNet(num_classes=y_train_oh_g.shape[1])
time_model_g, time_acc_g, time_f1_g, time_cm_g, time_report_g = train_model(time_model_g, time_train_loader_g, time_val_loader_g, y_val_oh_g, class_names=ohe_g.categories_[0], patience = 30, epochs = 200, pool = True)

In [None]:
print("Evaluating the Time-domain (Gender) model on Test Set")
test_time_acc_g, test_time_f1_g, test_time_cm_g, test_time_report_g = evaluate_model(time_model_g, time_test_loader_g, y_test_oh_g, class_names=ohe_g.categories_[0], pool = True)

In [None]:
def evaluate_fused_models(
    model, report_model, model_g, report_model_g, data_loader, y_true_onehot, class_names
):
    model.eval()
    model_g.eval()
    device = next(model.parameters()).device

    # Extract F1 scores for coarse model (6 classes)
    f1_model = np.array([report_model[cls]['f1-score'] for cls in class_names])

    # Pool F1 scores from fine-grained model_g (12 classes)
    f1_model_g = []
    for cls in class_names:
        f1_female = report_model_g.get(f"{cls}_female", {'f1-score': 0.0}).get('f1-score', 0.0)
        f1_male = report_model_g.get(f"{cls}_male", {'f1-score': 0.0}).get('f1-score', 0.0)
        f1_model_g.append(f1_female + f1_male)
    f1_model_g = np.array(f1_model_g)

    # Normalize the F1 scores to use as weights (total weight = 1)
    total_f1 = np.sum(f1_model + f1_model_g)
    f1_model_normalized = (f1_model / total_f1)  # Normalize coarse model F1 scores
    f1_model_g_normalized = (f1_model_g / total_f1)  # Normalize fine model F1 scores

    # Combine the model weights
    class_weights = f1_model_normalized + f1_model_g_normalized

    y_true = np.argmax(y_true_onehot, axis=1)
    all_preds_fused = []

    with torch.no_grad():
        for X_batch, _ in data_loader:
            X_batch = X_batch.to(device)

            out_model = torch.softmax(model(X_batch), dim=1).cpu().numpy()         # (B, 6)
            out_model_g = torch.softmax(model_g(X_batch), dim=1).cpu().numpy()     # (B, 12)

            # Pool the gender-specific outputs
            out_model_g_pooled = out_model_g[:, ::2] + out_model_g[:, 1::2]  # (B, 6)

            # Fusion with exponential weighting: Prioritize the more confident model
            alpha = 2.0  # Exponential scaling factor
            fused_probs = np.exp(alpha * f1_model_normalized) * out_model + np.exp(alpha * f1_model_g_normalized) * out_model_g_pooled
            fused_probs /= np.sum(np.exp(alpha * np.array([f1_model_normalized, f1_model_g_normalized])), axis=0)

            preds_fused = np.argmax(fused_probs, axis=1)
            all_preds_fused.extend(preds_fused)

    acc = accuracy_score(y_true, all_preds_fused)
    f1 = f1_score(y_true, all_preds_fused, average='weighted')
    report = classification_report(y_true, all_preds_fused, target_names=class_names, output_dict=True)
    print(f"Fused Accuracy: {acc}")
    print(f"Fused F1 Score: {f1}")
    print("Fused Classification Report:")
    print(classification_report(y_true, all_preds_fused, target_names=class_names))

    cm = confusion_matrix(y_true, all_preds_fused)
    sns.heatmap(cm, annot=True, fmt='d', xticklabels=class_names, yticklabels=class_names, cmap='Blues')
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.title("Fused Confusion Matrix (Exponential Weighted Fusion)")
    plt.show()

    return acc, f1, cm, report

In [None]:
def plot_confusion_matrices(cm_list, titles, class_names):
    if len(cm_list) != len(titles):
        raise ValueError("The number of confusion matrices must match the number of titles.")

    n = len(cm_list)
    cols = min(n, 3)
    rows = math.ceil(n / cols)

    fig, axes = plt.subplots(rows, cols, figsize=(6 * cols, 5 * rows))
    axes = axes.flatten() if n > 1 else [axes]

    for i, (cm, title) in enumerate(zip(cm_list, titles)):
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                    xticklabels=class_names, yticklabels=class_names, ax=axes[i])
        axes[i].set_xlabel("Predicted")
        axes[i].set_ylabel("True")
        axes[i].set_title(title)

    # Hide unused subplots
    for j in range(i + 1, len(axes)):
        fig.delaxes(axes[j])

    plt.tight_layout()
    plt.show()

In [None]:
print("Evaluating the Time-domain (Fused) models on Validation Set")
time_acc_f, time_f1_f, time_cm_f, time_report_f = evaluate_fused_models(time_model, time_report, time_model_g, time_report_g, time_val_loader, y_val_oh, ohe.categories_[0])
plot_confusion_matrices([time_cm, time_cm_g, time_cm_f], [f"Original ({100*time_acc:.2f}%)", f"Gender ({100*time_acc_g:.2f}%)", f"Fused ({100*time_acc_f:.2f}%)"], ohe.categories_[0])

In [None]:
print("Evaluating the Time-domain (Fused) models on Test Set")
test_time_acc_f, test_time_f1_f, test_time_cm_f, test_time_report_g = evaluate_fused_models(time_model, time_report, time_model_g, time_report_g, time_test_loader, y_test_oh, ohe.categories_[0])
plot_confusion_matrices([test_time_cm, test_time_cm_g, test_time_cm_f], [f"Original ({100*test_time_acc:.2f}%)", f"Gender ({100*test_time_acc_g:.2f}%)", f"Fused ({100*test_time_acc_f:.2f}%)"], ohe.categories_[0])

In [None]:
# import torch
# import torch.nn as nn
# import torch.nn.functional as F

# class FreqNet(nn.Module):
#     def __init__(self, num_classes):
#         super(FreqNet, self).__init__()

#         def conv_block(in_channels, out_channels, kernel_size):
#             return nn.Sequential(
#                 nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=kernel_size // 2),
#                 nn.BatchNorm2d(out_channels),
#                 nn.ReLU(),
#                 nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
#                 nn.BatchNorm2d(out_channels),
#                 nn.ReLU(),
#                 nn.MaxPool2d(2),
#                 nn.Dropout(0.2)
#             )

#         self.branch1 = conv_block(1, 32, kernel_size=3)
#         self.branch2 = conv_block(1, 32, kernel_size=5)
#         self.branch3 = conv_block(1, 32, kernel_size=7)

#         self.shared_block = nn.Sequential(
#             nn.Conv2d(96, 128, kernel_size=3, padding=1),
#             nn.BatchNorm2d(128),
#             nn.ReLU(),
#             nn.Conv2d(128, 128, kernel_size=3, padding=1),
#             nn.BatchNorm2d(128),
#             nn.ReLU(),
#             nn.MaxPool2d(2),
#             nn.Dropout(0.3)
#         )

#         self.global_pool = nn.AdaptiveAvgPool2d((1, 1))

#         self.classifier = nn.Sequential(
#             nn.Linear(128, 128),
#             nn.ReLU(),
#             nn.Dropout(0.4),
#             nn.Linear(128, 64),
#             nn.ReLU(),
#             nn.Dropout(0.3),
#             nn.Linear(64, num_classes)
#         )

#     def forward(self, x):
#         x1 = self.branch1(x)
#         x2 = self.branch2(x)
#         x3 = self.branch3(x)

#         x_concat = torch.cat([x1, x2, x3], dim=1)
#         x = self.shared_block(x_concat)
#         x = self.global_pool(x)
#         x = x.view(x.size(0), -1)
#         return self.classifier(x)

In [None]:
import torch.nn.functional as F

class FreqNet(nn.Module):
    def __init__(self, num_classes=6):  
        super(FreqNet, self).__init__()  
        self.conv1 = nn.Conv2d(1, 128, kernel_size=7, padding=3)
        self.pool1 = nn.MaxPool2d(2, stride=2)

        self.conv2 = nn.Conv2d(128, 256, kernel_size=7, padding=3)
        self.pool2 = nn.MaxPool2d(2, stride=2)

        self.conv3 = nn.Conv2d(256, 256, kernel_size=7, padding=3)
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))

        self.fc1 = nn.Linear(256, 64)
        self.fc2 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))   # (B, 128, H, W)
        x = self.pool1(x)           # (B, 128, H/2, W/2)
        x = F.relu(self.conv2(x))   # (B, 256, H/2, W/2)
        x = self.pool2(x)           # (B, 256, H/4, W/4)
        x = F.relu(self.conv3(x))   # (B, 256, H/4, W/4)
        x = self.global_avg_pool(x) # (B, 256, 1, 1)
        x = x.view(x.size(0), -1)   # (B, 256)
        x = F.relu(self.fc1(x))     # (B, 64)
        x = self.fc2(x)             # (B, num_classes)
        return x

In [None]:
set_seed(RANDOM_SEED)
mel_model = FreqNet(num_classes=y_train_oh.shape[1])

mel_model, mel_acc, mel_f1, mel_cm, mel_report = train_model(
    mel_model,
    mel_train_loader,
    mel_val_loader,
    y_val_oh,
    class_names=ohe.categories_[0],
    lr=0.00001
)

In [None]:
set_seed(RANDOM_SEED)
mel_model_g = FreqNet(num_classes=y_train_oh_g.shape[1])

mel_model_g, mel_acc_g, mel_f1_g, mel_cm_g, mel_report_g = train_model(
    mel_model_g,
    mel_train_loader_g,
    mel_val_loader_g,
    y_val_oh_g,
    class_names=ohe_g.categories_[0],
    pool = True
)

In [None]:
print("Evaluating the Frequency-domain model on Test Set")
test_mel_acc, test_mel_f1, test_mel_cm, test_mel_report = evaluate_model(
    mel_model,
    mel_test_loader,
    y_test_oh,
    class_names=ohe.categories_[0]
)

In [None]:
print("Evaluating the Frequency-domain (Gender) model on Test Set")
test_mel_acc_g, test_mel_f1_g, test_mel_cm_g, test_mel_report_g = evaluate_model(
    mel_model_g,
    mel_test_loader_g,
    y_test_oh_g,
    class_names=ohe_g.categories_[0],
    pool = True
)

In [None]:
print("Evaluating the Fequency-domain (Fused) models on Validation Set")
mel_acc_f, mel_f1_f, mel_cm_f, mel_report_f = evaluate_fused_models(mel_model, mel_report, mel_model_g, mel_report_g, mel_val_loader, y_val_oh, ohe.categories_[0])
plot_confusion_matrices([mel_cm, mel_cm_g, mel_cm_f], [f"Original ({100*mel_acc:.2f}%)", f"Gender ({100*mel_acc_g:.2f}%)", f"Fused ({100*mel_acc_f:.2f}%)"], ohe.categories_[0])

In [None]:
print("Evaluating the Frequency-domain (Fused) models on Test Set")
test_mel_acc_f, test_mel_f1_f, test_mel_cm_f, test_mel_report_f = evaluate_fused_models(mel_model, mel_report, mel_model_g, mel_report_g, mel_test_loader, y_test_oh, ohe.categories_[0])
plot_confusion_matrices([test_mel_cm, test_mel_cm_g, test_mel_cm_f], [f"Original ({100*test_mel_acc:.2f}%)", f"Gender ({100*test_mel_acc_g:.2f}%)", f"Fused ({100*test_mel_acc_f:.2f}%)"], ohe.categories_[0])

# ALL Models with best parameters

In [None]:
import torch

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Paths to model files
model_paths = {
    "mel_model": "/kaggle/input/mel_model/pytorch/default/1/mel_model.pth",
    "mel_model_g": "/kaggle/input/mel_model_g/pytorch/default/1/mel_model_g.pth",
    "time_model": "/kaggle/input/time_model/pytorch/default/1/time_model.pth",
    "time_model_g": "/kaggle/input/time_model_g/pytorch/default/1/time_model_g.pth"
}

# Class mapping for each model
model_classes = {
    "mel_model": FreqNet,
    "mel_model_g": FreqNet,
    "time_model": TimeNet,
    "time_model_g": TimeNet
}

# Correct number of output classes per model
model_num_classes = {
    "mel_model": 6,
    "mel_model_g": 12,
    "time_model": 6,
    "time_model_g": 12
}

# Dictionary to hold loaded models
loaded_models = {}

# Load each model properly
for name, path in model_paths.items():
    model_class = model_classes[name]
    num_classes = model_num_classes[name]
    
    # Instantiate and load the model
    model = model_class(num_classes=num_classes)
    model.load_state_dict(torch.load(path, map_location=device, weights_only=True))
    model.to(device)
    model.eval()
    
    loaded_models[name] = model
    print(f"Loaded and ready: {name}")

mel_model = loaded_models["mel_model"]
mel_model_g = loaded_models["mel_model_g"]
time_model_g = loaded_models["time_model_g"]
time_model = loaded_models["time_model"]
# mel_model.eval()
# mel_model_g.eval()
# time_model_g.eval()
# time_model.eval()



In [None]:
mel_acc, mel_f1, mel_cm, mel_report = evaluate_model(
    mel_model,
    mel_val_loader,
    y_val_oh,
    class_names=ohe.categories_[0]
)

print("Evaluating the Frequency-domain model on Test Set")
test_mel_acc, test_mel_f1, test_mel_cm, test_mel_report = evaluate_model(
    mel_model,
    mel_test_loader,
    y_test_oh,
    class_names=ohe.categories_[0]
)
mel_acc_g, mel_f1_g, mel_cm_g, mel_report_g = evaluate_model(
    mel_model_g,
    mel_val_loader_g,
    y_val_oh_g,
    class_names=ohe_g.categories_[0],
    pool = True
)

print("Evaluating the Frequency-domain (Gender) model on Test Set")
test_mel_acc_g, test_mel_f1_g, test_mel_cm_g, test_mel_report_g = evaluate_model(
    mel_model_g,
    mel_test_loader_g,
    y_test_oh_g,
    class_names=ohe_g.categories_[0],
    pool = True
)
print("Evaluating the Frequency-domain (Fused) models on Test Set")
test_mel_acc_f, test_mel_f1_f, test_mel_cm_f, test_mel_report_f = evaluate_fused_models(mel_model, mel_report, mel_model_g, mel_report_g, mel_test_loader, y_test_oh, ohe.categories_[0])
plot_confusion_matrices([test_mel_cm, test_mel_cm_g, test_mel_cm_f], [f"Original ({100*test_mel_acc:.2f}%)", f"Gender ({100*test_mel_acc_g:.2f}%)", f"Fused ({100*test_mel_acc_f:.2f}%)"], ohe.categories_[0])

In [None]:
print(ohe.categories_[0])
time_acc, time_f1, time_cm, time_report = evaluate_model(time_model, time_val_loader, y_val_oh, class_names=ohe.categories_[0])

print("Evaluating the Time-domain model on Test Set")
test_time_acc, test_time_f1, test_time_cm, test_time_report = evaluate_model(time_model, time_test_loader, y_test_oh, class_names=ohe.categories_[0])

time_acc_g, time_f1_g, time_cm_g, time_report_g = evaluate_model(time_model_g, time_val_loader_g, y_val_oh_g, class_names=ohe_g.categories_[0], pool = True)

print("Evaluating the Time-domain (Gender) model on Test Set")
test_time_acc_g, test_time_f1_g, test_time_cm_g, test_time_report_g = evaluate_model(time_model_g, time_test_loader_g, y_test_oh_g, class_names=ohe_g.categories_[0], pool = True)

print("Evaluating the Time-domain (Fused) models on Validation Set")
time_acc_f, time_f1_f, time_cm_f, time_report_f = evaluate_fused_models(time_model, time_report, time_model_g, time_report_g, time_val_loader, y_val_oh, ohe.categories_[0])
plot_confusion_matrices([time_cm, time_cm_g, time_cm_f], [f"Original ({100*time_acc:.2f}%)", f"Gender ({100*time_acc_g:.2f}%)", f"Fused ({100*time_acc_f:.2f}%)"], ohe.categories_[0])
print("Evaluating the Time-domain (Fused) models on Test Set")

print("Evaluating the Time-domain (Fused) models on Test Set")
test_time_acc_f, test_time_f1_f, test_time_cm_f, test_time_report_g = evaluate_fused_models(time_model, time_report, time_model_g, time_report_g, time_test_loader, y_test_oh, ohe.categories_[0])
plot_confusion_matrices([test_time_cm, test_time_cm_g, test_time_cm_f], [f"Original ({100*test_time_acc:.2f}%)", f"Gender ({100*test_time_acc_g:.2f}%)", f"Fused ({100*test_time_acc_f:.2f}%)"], ohe.categories_[0])

In [None]:
plot_confusion_matrices([
    test_time_cm, test_time_cm_g, test_time_cm_f, 
    test_mel_cm, test_mel_cm_g, test_mel_cm_f], [
    f"Original Time ({100*test_time_acc:.2f}%)", f"Gender Time ({100*test_time_acc_g:.2f}%)", f"Fused Time ({100*test_time_acc_f:.2f}%)", 
    f"Original Mel ({100*test_mel_acc:.2f}%)", f"Gender Mel ({100*test_mel_acc_g:.2f}%)", f"Fused Mel ({100*test_mel_acc_f:.2f}%)",], 
    ohe.categories_[0])

# hyperparameter Tuning 

In [None]:

def tune_and_save_best_model(
    model_class,
    param_grid,
    train_loader,
    val_loader,
    y_val,
    class_names,
    num_classes,
    model_name, 
    output_dir='/kaggle/working'
):
    best_f1 = -1
    best_model = None
    best_params = {}
    best_acc = 0.0

    all_combinations = list(itertools.product(*param_grid.values()))

    for combo in all_combinations:
        params = dict(zip(param_grid.keys(), combo))
        print(f"\nTrying config: {params}...")

        model = model_class(num_classes=num_classes)

        trained_model, acc, f1, cm, report = train_model(
            model=model,
            train_loader=train_loader,
            val_loader=val_loader,
            y_val=y_val,
            class_names=class_names,
            epochs=params.get('epochs', 100),
            lr=params.get('lr', 0.001),
            patience=params.get('patience', 10),
            isTuning = False
        )

        print(f"F1 Score = {f1:.4f}, Accuracy = {acc:.4f}")

        if f1 > best_f1:
            best_f1 = f1
            best_acc = acc
            best_model = trained_model
            best_params = params

    if best_model:
        # Save under /kaggle/working/models/model_name/
        model_dir = os.path.join(output_dir, "models", model_name)
        os.makedirs(model_dir, exist_ok=True)

        # base_filename = f"{model_name}_f1_{best_f1:.4f}_acc_{best_acc:.4f}"
        # save_path = os.path.join(model_dir, base_filename + ".pth")
        # meta_path = os.path.join(model_dir, base_filename + "_meta.json")
        save_path = os.path.join(model_dir, f"{model_name}.pth")
        meta_path = os.path.join(model_dir, f"{model_name}_meta.json")
        # Save model weights
        torch.save(best_model.state_dict(), save_path)

        # Save metadata
        meta = {
            "best_params": best_params,
            "best_f1": best_f1,
            "best_acc": best_acc,
            "model_path": save_path
        }
        with open(meta_path, "w") as f:
            json.dump(meta, f, indent=4)

        print(f"\n Best model saved to: {save_path}")
        print(f"Metadata saved to: {meta_path}")
    else:
        save_path = None
        print(" No valid model was trained.")
    best_model.eval()
    return best_params, best_f1, save_path


In [None]:
param_grid = {
    'lr': [0.005, 0.001, 0.0001],
    'epochs': [50, 100],
    'patience': [10, 15]
}


best_params, best_f1, saved_path = tune_and_save_best_model(
    model_class=TimeNet,
    param_grid=param_grid,
    train_loader=time_train_loader,
    val_loader=time_val_loader,
    y_val=y_val_oh,
    class_names=ohe.categories_[0],
    num_classes=y_train_oh.shape[1],
    model_name="time_Model"
)
# best_params, best_f1, saved_path = tune_and_save_best_model(
#     model_class=FreqNet,  
#     param_grid=param_grid,
#     train_loader=mel_train_loader,
#     val_loader=mel_val_loader,
#     y_val=y_val_oh,
#     class_names=ohe.categories_[0],
#     num_classes=y_train_oh.shape[1],  
#     model_name="mal1_Model"
# )

In [None]:
param_grid = {
    'lr': [0.005, 0.001],
    'epochs': [20, 30],
    'patience': [5, 10]
}

best_params, best_f1, saved_path = tune_and_save_best_model(
    model_class=FreqNet,  
    param_grid=param_grid,
    train_loader=mel_train_loader,
    val_loader=mel_val_loader,
    y_val=y_val_oh,
    class_names=ohe.categories_[0],
    num_classes=y_train_oh.shape[1],  
    model_name="mal_Model"
)

In [None]:

def load_model_from_folder(model_class, model_name, num_classes, base_path='/kaggle/working/models'):
    model_dir = os.path.join(base_path, model_name)
    
    if not os.path.exists(model_dir):
        raise FileNotFoundError(f"Directory '{model_dir}' does not exist.")
    
    # Load model weights
    pth_files = [f for f in os.listdir(model_dir) if f.endswith('.pth')]
    if not pth_files:
        raise FileNotFoundError(f"No .pth file found in '{model_dir}'.")
    model_path = os.path.join(model_dir, pth_files[0])
    
    model = model_class(num_classes=num_classes)
    state_dict = torch.load(model_path, weights_only=True)
    model.load_state_dict(state_dict)
    model.eval()
    
    print(f"Loaded model from: {model_path}")
    
    # Load best parameters if present
    best_params_path = os.path.join(model_dir, f"{model_name}_meta.json")
    best_params = None
    if os.path.exists(best_params_path):
        with open(best_params_path, 'r') as f:
            best_params = json.load(f)
        # print(f"Loaded best parameters: {best_params}")
    else:
        print("No best_params.json found.")

    return model, best_params


In [None]:
print("Evaluating the Time-domain (Gender) model on Test Set")

# Load the model
time_model2,best_params = load_model_from_folder(TimeNet, "time_Model", num_classes=y_train_oh.shape[1])
print(best_params)
time_model2.eval()  

# Evaluate
test_time_acc_g, test_time_f1_g, test_time_cm_g, test_time_report_g = evaluate_model(time_model2, time_test_loader, y_test_oh, class_names=ohe.categories_[0])



In [None]:
print("Evaluating the Mel-spectrogram (Emotion) model on Test Set")

# Load the model
mel_model,best_params = load_model_from_folder(FreqNet, "mal_Model", num_classes=y_train_oh.shape[1])
print(best_params)
# Evaluate
test_mel_acc, test_mel_f1, test_mel_cm, test_mel_report = evaluate_model(
    mel_model,
    mel_test_loader,           
    y_test_oh,                 
    class_names=ohe.categories_[0],
    # pool=True
)

In [None]:
# import shutil
# import os

# model_dir = "/kaggle/working/models/time_Model"

# if os.path.exists(model_dir):
#     shutil.rmtree(model_dir)
#     print(f"Deleted folder: {model_dir}")
# else:
#     print(f"Folder not found: {model_dir}")
