<a href="https://colab.research.google.com/github/Baah134/Baah134/blob/main/SER_CARINE/Paper_9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import numpy as np
import pandas as pd
import librosa
from tqdm import tqdm
import soundfile as sf

# --- Configuration ---
# Output Path for Saved Arrays
OUTPUT_PATH = "/content/drive/MyDrive/DeepLearning/Paper_9_Replication_Features/"
os.makedirs(OUTPUT_PATH, exist_ok=True)

# Audio Settings
SAMPLE_RATE = 22050 # Librosa default
DURATION_FIXED = 3.0 # Optional: Fix duration if needed, or leave dynamic

# --- Part 1: Feature Extraction Logic (The 193-Dim Vector) ---
def extract_features(data, sr=SAMPLE_RATE):
    """
    Extracts the 5 features specified in Issa et al. (2020) and returns a 193-dim vector.
    Features: MFCC (40), Chroma (12), Mel (128), Contrast (7), Tonnetz (6).
    """
    # 1. MFCC (40)
    stft = np.abs(librosa.stft(data))
    mfccs = np.mean(librosa.feature.mfcc(y=data, sr=sr, n_mfcc=40).T, axis=0)

    # 2. Chroma (12)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T, axis=0)

    # 3. Mel Spectrogram (128)
    mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sr).T, axis=0)

    # 4. Spectral Contrast (7)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sr).T, axis=0)

    # 5. Tonnetz (6)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(data), sr=sr).T, axis=0)

    # Stack all features (40 + 12 + 128 + 7 + 6 = 193)
    return np.hstack([mfccs, chroma, mel, contrast, tonnetz])

# --- Part 2: Augmentation Logic (For EMO-DB Only) ---
def augment_audio(data, sr):
    """
    Generates 4 augmented versions of the input audio.
    1. Speed Up (1.23x)
    2. Slow Down (0.81x)
    3. Noise (25% of length)
    4. Time Shift
    """
    augmented_versions = []

    # 1. Speed Up (1.23)
    # Note: simple resampling changes pitch too, which is common in simple augmentation
    y_fast = librosa.effects.time_stretch(data, rate=1.23)
    augmented_versions.append(y_fast)

    # 2. Slow Down (0.81)
    y_slow = librosa.effects.time_stretch(data, rate=0.81)
    augmented_versions.append(y_slow)

    # 3. Noise (Add random noise to 25% of the file)
    y_noise = data.copy()
    noise_len = int(len(y_noise) * 0.25)
    start_idx = np.random.randint(0, len(y_noise) - noise_len)
    noise = np.random.randn(noise_len) * 0.005 # Scale noise
    y_noise[start_idx : start_idx + noise_len] += noise
    augmented_versions.append(y_noise)

    # 4. Time Shift (Shift start by small amount)
    shift_range = int(np.random.uniform(-0.05, 0.05) * len(data))
    y_shift = np.roll(data, shift_range)
    augmented_versions.append(y_shift)

    return augmented_versions

# ==========================================
# PROCESS 1: RAVDESS (The Baseline - 8 Classes)
# ==========================================
print(">>> Processing RAVDESS...")
ravdess_path = "/content/drive/MyDrive/DeepLearning/External/RAVDESS Emotional Speech Audio/audio_speech_actors_01-24/"

X_rav = []
y_rav = []
groups_rav = [] # Actor IDs

# Mapping (Paper uses 8 classes: Calm is distinct)
rav_mapping = {
    1:'neutral', 2:'calm', 3:'happy', 4:'sad',
    5:'angry', 6:'fear', 7:'disgust', 8:'surprise'
}

if os.path.exists(ravdess_path):
    actors = os.listdir(ravdess_path)
    for actor_dir in tqdm(actors):
        actor_path = os.path.join(ravdess_path, actor_dir)
        if not os.path.isdir(actor_path): continue

        for file in os.listdir(actor_path):
            try:
                # Parse Filename: 03-01-06-01-02-01-12.wav
                parts = file.split('.')[0].split('-')
                emotion_code = int(parts[2])
                actor_id = parts[6] # The last part is the actor ID

                # Load Audio
                file_path = os.path.join(actor_path, file)
                data, sr = librosa.load(file_path, sr=SAMPLE_RATE)

                # Extract Features (No Augmentation)
                features = extract_features(data, sr)

                label = rav_mapping.get(emotion_code)
                if label:
                    X_rav.append(features)
                    y_rav.append(label)
                    groups_rav.append(actor_id)
            except Exception as e:
                print(f"Error RAVDESS {file}: {e}")

    # Save RAVDESS
    np.save(f"{OUTPUT_PATH}RAVDESS_X.npy", np.array(X_rav))
    np.save(f"{OUTPUT_PATH}RAVDESS_y.npy", np.array(y_rav))
    np.save(f"{OUTPUT_PATH}RAVDESS_groups.npy", np.array(groups_rav))
    print(f"RAVDESS Saved: {len(X_rav)} samples.")

# ==========================================
# PROCESS 2: EMO-DB (Model B - 5 Classes + Augmentation)
# ==========================================
print("\n>>> Processing EMO-DB...")
emodb_path = "/content/drive/MyDrive/DeepLearning/External/EMoDB/"

X_emo = []
y_emo = []
groups_emo = []

# Filter: Remove 'Boredom' (L) and 'Disgust' (E) to replicate Model B
# Map: W->Angry, A->Anxiety(Fear), F->Happiness, T->Sadness, N->Neutral
valid_emotions = {
    'W': 'Angry',
    'A': 'Fear',
    'F': 'Happiness',
    'T': 'Sadness',
    'N': 'Neutral'
}

if os.path.exists(emodb_path):
    files = os.listdir(emodb_path)
    for file in tqdm(files):
        try:
            # Parse: 03a01Fa.wav
            emotion_code = file[5]
            speaker_id = file[0:2] # First 2 chars are Speaker ID

            # Check if this is one of the 5 allowed classes
            if emotion_code in valid_emotions:
                label = valid_emotions[emotion_code]
                file_path = os.path.join(emodb_path, file)

                # Load Audio
                data, sr = librosa.load(file_path, sr=SAMPLE_RATE)

                # 1. Original
                X_emo.append(extract_features(data, sr))
                y_emo.append(label)
                groups_emo.append(speaker_id)

                # 2. Augmentations (4 versions)
                aug_data_list = augment_audio(data, sr)
                for aug_data in aug_data_list:
                    X_emo.append(extract_features(aug_data, sr))
                    y_emo.append(label)
                    groups_emo.append(speaker_id) # IMPORTANT: Augmentations get same Speaker ID

        except Exception as e:
            print(f"Error EMO-DB {file}: {e}")

    # Save EMO-DB
    np.save(f"{OUTPUT_PATH}EMODB_X.npy", np.array(X_emo))
    np.save(f"{OUTPUT_PATH}EMODB_y.npy", np.array(y_emo))
    np.save(f"{OUTPUT_PATH}EMODB_groups.npy", np.array(groups_emo))
    print(f"EMO-DB Saved: {len(X_emo)} samples (should be 5x original count).")

>>> Processing RAVDESS...


100%|██████████| 24/24 [19:22<00:00, 48.45s/it]


RAVDESS Saved: 1440 samples.

>>> Processing EMO-DB...


100%|██████████| 535/535 [12:52<00:00,  1.44s/it]

EMO-DB Saved: 2040 samples (should be 5x original count).





# **RAVDESS REPLICATION**

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers
import numpy as np
import os
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

# --- Configuration (from Issa et al. 2020) ---
BATCH_SIZE = 32 # Not specified, 32 is standard
LEARNING_RATE = 0.00001 # Specified in paper
DECAY = 1e-6 # Specified in paper
EPOCHS = 700 # Specified in paper (It's a lot, but required for replication)
DATA_PATH = "/content/drive/MyDrive/DeepLearning/Paper_9_Replication_Features/"

# Set seeds
def set_seed(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

set_seed(42)

# ==========================================
# 1. MODEL ARCHITECTURE (Baseline 1D-CNN)
# ==========================================
# Based on Section 3.3 and Fig 2 of Issa et al. (2020)
def build_baseline_model(input_shape, num_classes):
    inputs = layers.Input(shape=input_shape)

    # Layer 1: Conv(256) -> BN -> ReLU
    x = layers.Conv1D(256, 5, strides=1, padding='same')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)

    # Layer 2: Conv(128) -> ReLU -> Dropout(0.1) -> BN -> MaxPool(8)
    x = layers.Conv1D(128, 5, strides=1, padding='same')(x)
    x = layers.Activation('relu')(x)
    x = layers.Dropout(0.1)(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling1D(pool_size=8)(x)

    # Layer 3: Conv(128) -> ReLU
    x = layers.Conv1D(128, 5, strides=1, padding='same')(x)
    x = layers.Activation('relu')(x)

    # Layer 4: Conv(128) -> ReLU
    x = layers.Conv1D(128, 5, strides=1, padding='same')(x)
    x = layers.Activation('relu')(x)

    # Layer 5: Conv(128) -> BN -> ReLU -> Dropout(0.2)
    x = layers.Conv1D(128, 5, strides=1, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Dropout(0.2)(x)

    # Layer 6: Conv(128) -> Flatten -> Dropout(0.2)
    x = layers.Conv1D(128, 5, strides=1, padding='same')(x)
    x = layers.Flatten()(x)
    x = layers.Dropout(0.2)(x)

    # Output: Dense(8) -> BN -> Softmax
    x = layers.Dense(num_classes)(x)
    x = layers.BatchNormalization()(x)
    outputs = layers.Activation('softmax')(x)

    model = models.Model(inputs=inputs, outputs=outputs, name="Issa_Baseline_RAVDESS")
    return model

# ==========================================
# 2. DATA LOADING & EXPERIMENT SWITCH
# ==========================================
print("Loading RAVDESS data...")
X = np.load(os.path.join(DATA_PATH, 'RAVDESS_X.npy'))
y = np.load(os.path.join(DATA_PATH, 'RAVDESS_y.npy'))
groups = np.load(os.path.join(DATA_PATH, 'RAVDESS_groups.npy'))

# Encode Labels
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_enc = le.fit_transform(y)
classes = le.classes_
print(f"Classes: {classes}")

# Reshape for 1D CNN: (N, 193) -> (N, 193, 1)
X = X.reshape(X.shape[0], X.shape[1], 1)

# ------------------------------------------------------------------
# [THE CONTROL SWITCH]
# 'REPLICATION' = Random 80/20 Split (Try to match ~71%)
# 'DISPROVE'    = Hold out Actors 21-24 (Test Generalization)
EXPERIMENT_MODE = 'REPLICATION'
# ------------------------------------------------------------------

if EXPERIMENT_MODE == 'REPLICATION':
    print(f"\n>>> MODE: REPLICATION (Random Split) <<<")
    # Paper uses 5-fold cross val, but for quick check we use single 80/20 random split
    X_train, X_test, y_train, y_test = train_test_split(X, y_enc, test_size=0.2, random_state=42, shuffle=True)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42) # Create Val set

elif EXPERIMENT_MODE == 'DISPROVE':
    print(f"\n>>> MODE: DISPROVE (Speaker Strict) <<<")
    # Hold out the last 4 actors (21, 22, 23, 24)
    # Note: IDs in groups are strings like '21', '22'
    test_actors = ['21', '22', '23', '24']
    print(f"Testing on Actors: {test_actors}")

    test_mask = np.isin(groups, test_actors)

    X_test = X[test_mask]
    y_test = y_enc[test_mask]

    X_train_full = X[~test_mask]
    y_train_full = y_enc[~test_mask]

    # Create Val set from Train
    X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.1, random_state=42)

print(f"Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")

# ==========================================
# 3. TRAINING
# ==========================================

model = build_baseline_model(input_shape=(193, 1), num_classes=len(classes))

# Optimizer from paper: RMSProp, lr=0.00001
opt = optimizers.RMSprop(learning_rate=LEARNING_RATE) # Decay is deprecated in new Keras, handled by scheduling if needed

model.compile(optimizer=opt,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Callbacks
early_stop = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=50, restore_best_weights=True)

print("\nStarting Training...")
history = model.fit(
    X_train, y_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_data=(X_val, y_val),

    verbose=1
)

# ==========================================
# 4. EVALUATION
# ==========================================
print("\n--- FINAL EVALUATION ---")
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {acc*100:.2f}%")

# Detailed Metrics
y_pred = np.argmax(model.predict(X_test), axis=1)
f1 = f1_score(y_test, y_pred, average='weighted')
print(f"Test F1-Score: {f1:.4f}")

# Confusion Matrix
print("\nConfusion Matrix:")
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(f"Class Order: {classes}")

if EXPERIMENT_MODE == 'REPLICATION':
    print(f"\nTarget to beat: ~71.61% (Paper Result)")
elif EXPERIMENT_MODE == 'DISPROVE':
    print(f"\nIf this is significantly lower than 71%, you have successfully disproven the model's generalization.")

Loading RAVDESS data...
Classes: ['angry' 'calm' 'disgust' 'fear' 'happy' 'neutral' 'sad' 'surprise']

>>> MODE: REPLICATION (Random Split) <<<
Train: (1036, 193, 1), Val: (116, 193, 1), Test: (288, 193, 1)

Starting Training...
Epoch 1/700
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 160ms/step - accuracy: 0.1267 - loss: 2.5010 - val_accuracy: 0.1552 - val_loss: 2.1403
Epoch 2/700
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10ms/step - accuracy: 0.1598 - loss: 2.3566 - val_accuracy: 0.2069 - val_loss: 2.1014
Epoch 3/700
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.1449 - loss: 2.2884 - val_accuracy: 0.1897 - val_loss: 2.0808
Epoch 4/700
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.1919 - loss: 2.2078 - val_accuracy: 0.1983 - val_loss: 2.0497
Epoch 5/700
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.2058 - loss: 2.2264 - v

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers
import numpy as np
import os
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

# --- Configuration (from Issa et al. 2020) ---
BATCH_SIZE = 32 # Not specified, 32 is standard
LEARNING_RATE = 0.00001 # Specified in paper
DECAY = 1e-6 # Specified in paper
EPOCHS = 700 # Specified in paper (It's a lot, but required for replication)
DATA_PATH = "/content/drive/MyDrive/DeepLearning/Paper_9_Replication_Features/"

# Set seeds
def set_seed(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

set_seed(42)

# ==========================================
# 1. MODEL ARCHITECTURE (Baseline 1D-CNN)
# ==========================================
# Based on Section 3.3 and Fig 2 of Issa et al. (2020)
def build_baseline_model(input_shape, num_classes):
    inputs = layers.Input(shape=input_shape)

    # Layer 1: Conv(256) -> BN -> ReLU
    x = layers.Conv1D(256, 5, strides=1, padding='same')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)

    # Layer 2: Conv(128) -> ReLU -> Dropout(0.1) -> BN -> MaxPool(8)
    x = layers.Conv1D(128, 5, strides=1, padding='same')(x)
    x = layers.Activation('relu')(x)
    x = layers.Dropout(0.1)(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling1D(pool_size=8)(x)

    # Layer 3: Conv(128) -> ReLU
    x = layers.Conv1D(128, 5, strides=1, padding='same')(x)
    x = layers.Activation('relu')(x)

    # Layer 4: Conv(128) -> ReLU
    x = layers.Conv1D(128, 5, strides=1, padding='same')(x)
    x = layers.Activation('relu')(x)

    # Layer 5: Conv(128) -> BN -> ReLU -> Dropout(0.2)
    x = layers.Conv1D(128, 5, strides=1, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Dropout(0.2)(x)

    # Layer 6: Conv(128) -> Flatten -> Dropout(0.2)
    x = layers.Conv1D(128, 5, strides=1, padding='same')(x)
    x = layers.Flatten()(x)
    x = layers.Dropout(0.2)(x)

    # Output: Dense(8) -> BN -> Softmax
    x = layers.Dense(num_classes)(x)
    x = layers.BatchNormalization()(x)
    outputs = layers.Activation('softmax')(x)

    model = models.Model(inputs=inputs, outputs=outputs, name="Issa_Baseline_RAVDESS")
    return model

# ==========================================
# 2. DATA LOADING & EXPERIMENT SWITCH
# ==========================================
print("Loading RAVDESS data...")
X = np.load(os.path.join(DATA_PATH, 'RAVDESS_X.npy'))
y = np.load(os.path.join(DATA_PATH, 'RAVDESS_y.npy'))
groups = np.load(os.path.join(DATA_PATH, 'RAVDESS_groups.npy'))

# Encode Labels
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_enc = le.fit_transform(y)
classes = le.classes_
print(f"Classes: {classes}")

# Reshape for 1D CNN: (N, 193) -> (N, 193, 1)
X = X.reshape(X.shape[0], X.shape[1], 1)

# ------------------------------------------------------------------
# [THE CONTROL SWITCH]
# 'REPLICATION' = Random 80/20 Split (Try to match ~71%)
# 'DISPROVE'    = Hold out Actors 21-24 (Test Generalization)
EXPERIMENT_MODE = 'DISPROVE'
# ------------------------------------------------------------------

if EXPERIMENT_MODE == 'REPLICATION':
    print(f"\n>>> MODE: REPLICATION (Random Split) <<<")
    # Paper uses 5-fold cross val, but for quick check we use single 80/20 random split
    X_train, X_test, y_train, y_test = train_test_split(X, y_enc, test_size=0.2, random_state=42, shuffle=True)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42) # Create Val set

elif EXPERIMENT_MODE == 'DISPROVE':
    print(f"\n>>> MODE: DISPROVE (Speaker Strict) <<<")
    # Hold out the last 4 actors (21, 22, 23, 24)
    # Note: IDs in groups are strings like '21', '22'
    test_actors = ['21', '22', '23', '24']
    print(f"Testing on Actors: {test_actors}")

    test_mask = np.isin(groups, test_actors)

    X_test = X[test_mask]
    y_test = y_enc[test_mask]

    X_train_full = X[~test_mask]
    y_train_full = y_enc[~test_mask]

    # Create Val set from Train
    X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.1, random_state=42)

print(f"Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")

# ==========================================
# 3. TRAINING
# ==========================================

model = build_baseline_model(input_shape=(193, 1), num_classes=len(classes))

# Optimizer from paper: RMSProp, lr=0.00001
opt = optimizers.RMSprop(learning_rate=LEARNING_RATE) # Decay is deprecated in new Keras, handled by scheduling if needed

model.compile(optimizer=opt,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Callbacks
early_stop = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=50, restore_best_weights=True)

print("\nStarting Training...")
history = model.fit(
    X_train, y_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_data=(X_val, y_val),

    verbose=1
)

# ==========================================
# 4. EVALUATION
# ==========================================
print("\n--- FINAL EVALUATION ---")
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {acc*100:.2f}%")

# Detailed Metrics
y_pred = np.argmax(model.predict(X_test), axis=1)
f1 = f1_score(y_test, y_pred, average='weighted')
print(f"Test F1-Score: {f1:.4f}")

# Confusion Matrix
print("\nConfusion Matrix:")
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(f"Class Order: {classes}")

if EXPERIMENT_MODE == 'REPLICATION':
    print(f"\nTarget to beat: ~71.61% (Paper Result)")
elif EXPERIMENT_MODE == 'DISPROVE':
    print(f"\nIf this is significantly lower than 71%, you have successfully disproven the model's generalization.")

Loading RAVDESS data...
Classes: ['angry' 'calm' 'disgust' 'fear' 'happy' 'neutral' 'sad' 'surprise']

>>> MODE: DISPROVE (Speaker Strict) <<<
Testing on Actors: ['21', '22', '23', '24']
Train: (1080, 193, 1), Val: (120, 193, 1), Test: (240, 193, 1)

Starting Training...
Epoch 1/700
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 214ms/step - accuracy: 0.1293 - loss: 2.5307 - val_accuracy: 0.1667 - val_loss: 2.1266
Epoch 2/700
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.1462 - loss: 2.3651 - val_accuracy: 0.1667 - val_loss: 2.1113
Epoch 3/700
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.1814 - loss: 2.2076 - val_accuracy: 0.1583 - val_loss: 2.0824
Epoch 4/700
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.2003 - loss: 2.1517 - val_accuracy: 0.1833 - val_loss: 2.0525
Epoch 5/700
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/s

# **EMO-DB**

In [4]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers
import numpy as np
import os
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

# --- Configuration (Issa et al. 2020) ---
BATCH_SIZE = 32
LEARNING_RATE = 0.00001
EPOCHS = 300 # Paper mentions 300 for Model A/B [cite: 258]
DATA_PATH = "/content/drive/MyDrive/DeepLearning/Paper_9_Replication_Features/"

# Set seeds
def set_seed(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

set_seed(42)

# ==========================================
# 1. MODEL B ARCHITECTURE
# ==========================================
# [cite_start]Based on Section 4.2.2 [cite: 260-262, 348]
def build_model_b(input_shape, num_classes):
    inputs = layers.Input(shape=input_shape)

    # [cite_start]Block 1 (from Model A base) [cite: 257]
    x = layers.Conv1D(256, 5, strides=1, padding='same')(inputs)
    # [cite_start]Note: Model A removed BN here [cite: 257]
    x = layers.Activation('relu')(x)

    # Block 2
    x = layers.Conv1D(128, 5, strides=1, padding='same')(x)
    x = layers.Activation('relu')(x)
    x = layers.Dropout(0.1)(x)
    x = layers.MaxPooling1D(pool_size=8)(x)

    # Block 3
    x = layers.Conv1D(128, 5, strides=1, padding='same')(x)
    x = layers.Activation('relu')(x)

    # [cite_start]Modification for Model B: "Additional convolution layer before flattening" [cite: 348]
    x = layers.Conv1D(128, 5, strides=1, padding='same')(x)
    x = layers.Activation('relu')(x)

    # Flatten & Dropout
    x = layers.Flatten()(x)
    # [cite_start]Model A had dropout 0.2 here [cite: 257]
    x = layers.Dropout(0.2)(x)

    # Output Layer (5 Classes)
    x = layers.Dense(num_classes)(x)

    # [cite_start]Modification for Model B: "Dropout 0.25 after fully connected layer" [cite: 348]
    # This is unusual, but we place it before Softmax to match description
    x = layers.Dropout(0.25)(x)

    outputs = layers.Activation('softmax')(x)

    model = models.Model(inputs=inputs, outputs=outputs, name="Issa_Model_B_EMODB")
    return model

# ==========================================
# 2. DATA LOADING
# ==========================================
print("Loading EMO-DB data...")
X = np.load(os.path.join(DATA_PATH, 'EMODB_X.npy'))
y = np.load(os.path.join(DATA_PATH, 'EMODB_y.npy'))
groups = np.load(os.path.join(DATA_PATH, 'EMODB_groups.npy'))

# Encode Labels (Should be 5 classes: Angry, Fear, Happiness, Neutral, Sadness)
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_enc = le.fit_transform(y)
classes = le.classes_
print(f"Classes: {classes}")

# Reshape for 1D CNN: (N, 193) -> (N, 193, 1)
X = X.reshape(X.shape[0], X.shape[1], 1)

# ------------------------------------------------------------------
# [THE CONTROL SWITCH]
# 'REPLICATION' = Random Split (Replicating the 96% claim)
# 'DISPROVE'    = Speaker Strict (Holding out Actors 13 & 14)
EXPERIMENT_MODE = 'REPLICATION'
# ------------------------------------------------------------------

if EXPERIMENT_MODE == 'REPLICATION':
    print(f"\n>>> MODE: REPLICATION (Random Split) <<<")
    print("Warning: This allows Data Leakage (Augmented versions of Test files are in Train).")
    # Random split matching paper's likely method
    X_train, X_test, y_train, y_test = train_test_split(X, y_enc, test_size=0.2, random_state=42, shuffle=True)
    # Create tiny Val set from Train
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

elif EXPERIMENT_MODE == 'DISPROVE':
    print(f"\n>>> MODE: DISPROVE (Speaker Strict) <<<")
    # Hold out Speakers 13 and 14 (Arbitrary choice, but consistent)
    # Note: IDs are strings "03", "13", etc.
    test_actors = ['13', '14']
    print(f"Testing on Actors: {test_actors}")

    test_mask = np.isin(groups, test_actors)

    X_test = X[test_mask]
    y_test = y_enc[test_mask]

    X_train_full = X[~test_mask]
    y_train_full = y_enc[~test_mask]

    X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.1, random_state=42)

print(f"Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")

# ==========================================
# 3. TRAINING
# ==========================================

model = build_model_b(input_shape=(193, 1), num_classes=len(classes))

# [cite_start]Optimizer: RMSProp, lr=0.00001 [cite: 232]
opt = optimizers.RMSprop(learning_rate=LEARNING_RATE)

model.compile(optimizer=opt,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Callbacks
early_stop = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=30, restore_best_weights=True)

print("\nStarting Training...")
history = model.fit(
    X_train, y_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=1
)

# ==========================================
# 4. EVALUATION
# ==========================================
print("\n--- FINAL EVALUATION ---")
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {acc*100:.2f}%")

# Detailed Metrics
y_pred = np.argmax(model.predict(X_test), axis=1)
f1 = f1_score(y_test, y_pred, average='weighted')
print(f"Test F1-Score: {f1:.4f}")

# Confusion Matrix
print("\nConfusion Matrix:")
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(f"Class Order: {classes}")

if EXPERIMENT_MODE == 'REPLICATION':
    print(f"\nTarget to match: ~96.34% (Paper Result) [cite: 349]")
elif EXPERIMENT_MODE == 'DISPROVE':
    print(f"\nIf this is significantly lower than 96%, you have successfully invalidated Model B.")

Loading EMO-DB data...
Classes: ['Angry' 'Fear' 'Happiness' 'Neutral' 'Sadness']

>>> MODE: REPLICATION (Random Split) <<<
Train: (1468, 193, 1), Val: (164, 193, 1), Test: (408, 193, 1)

Starting Training...
Epoch 1/300
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 101ms/step - accuracy: 0.2577 - loss: 2.0371 - val_accuracy: 0.4024 - val_loss: 1.3791
Epoch 2/300
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.3748 - loss: 1.5054 - val_accuracy: 0.5000 - val_loss: 1.2255
Epoch 3/300
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.4443 - loss: 1.3529 - val_accuracy: 0.5671 - val_loss: 1.1272
Epoch 4/300
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.4975 - loss: 1.2172 - val_accuracy: 0.5976 - val_loss: 1.0634
Epoch 5/300
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.5135 - loss: 1.1876 - val_accuracy: 0.6463

In [5]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers
import numpy as np
import os
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

# --- Configuration (Issa et al. 2020) ---
BATCH_SIZE = 32
LEARNING_RATE = 0.00001
EPOCHS = 300 # Paper mentions 300 for Model A/B [cite: 258]
DATA_PATH = "/content/drive/MyDrive/DeepLearning/Paper_9_Replication_Features/"

# Set seeds
def set_seed(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

set_seed(42)

# ==========================================
# 1. MODEL B ARCHITECTURE
# ==========================================
# [cite_start]Based on Section 4.2.2 [cite: 260-262, 348]
def build_model_b(input_shape, num_classes):
    inputs = layers.Input(shape=input_shape)

    # [cite_start]Block 1 (from Model A base) [cite: 257]
    x = layers.Conv1D(256, 5, strides=1, padding='same')(inputs)
    # [cite_start]Note: Model A removed BN here [cite: 257]
    x = layers.Activation('relu')(x)

    # Block 2
    x = layers.Conv1D(128, 5, strides=1, padding='same')(x)
    x = layers.Activation('relu')(x)
    x = layers.Dropout(0.1)(x)
    x = layers.MaxPooling1D(pool_size=8)(x)

    # Block 3
    x = layers.Conv1D(128, 5, strides=1, padding='same')(x)
    x = layers.Activation('relu')(x)

    # [cite_start]Modification for Model B: "Additional convolution layer before flattening" [cite: 348]
    x = layers.Conv1D(128, 5, strides=1, padding='same')(x)
    x = layers.Activation('relu')(x)

    # Flatten & Dropout
    x = layers.Flatten()(x)
    # [cite_start]Model A had dropout 0.2 here [cite: 257]
    x = layers.Dropout(0.2)(x)

    # Output Layer (5 Classes)
    x = layers.Dense(num_classes)(x)

    # [cite_start]Modification for Model B: "Dropout 0.25 after fully connected layer" [cite: 348]
    # This is unusual, but we place it before Softmax to match description
    x = layers.Dropout(0.25)(x)

    outputs = layers.Activation('softmax')(x)

    model = models.Model(inputs=inputs, outputs=outputs, name="Issa_Model_B_EMODB")
    return model

# ==========================================
# 2. DATA LOADING
# ==========================================
print("Loading EMO-DB data...")
X = np.load(os.path.join(DATA_PATH, 'EMODB_X.npy'))
y = np.load(os.path.join(DATA_PATH, 'EMODB_y.npy'))
groups = np.load(os.path.join(DATA_PATH, 'EMODB_groups.npy'))

# Encode Labels (Should be 5 classes: Angry, Fear, Happiness, Neutral, Sadness)
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_enc = le.fit_transform(y)
classes = le.classes_
print(f"Classes: {classes}")

# Reshape for 1D CNN: (N, 193) -> (N, 193, 1)
X = X.reshape(X.shape[0], X.shape[1], 1)

# ------------------------------------------------------------------
# [THE CONTROL SWITCH]
# 'REPLICATION' = Random Split (Replicating the 96% claim)
# 'DISPROVE'    = Speaker Strict (Holding out Actors 13 & 14)
EXPERIMENT_MODE = 'DISPROVE'
# ------------------------------------------------------------------

if EXPERIMENT_MODE == 'REPLICATION':
    print(f"\n>>> MODE: REPLICATION (Random Split) <<<")
    print("Warning: This allows Data Leakage (Augmented versions of Test files are in Train).")
    # Random split matching paper's likely method
    X_train, X_test, y_train, y_test = train_test_split(X, y_enc, test_size=0.2, random_state=42, shuffle=True)
    # Create tiny Val set from Train
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

elif EXPERIMENT_MODE == 'DISPROVE':
    print(f"\n>>> MODE: DISPROVE (Speaker Strict) <<<")
    # Hold out Speakers 13 and 14 (Arbitrary choice, but consistent)
    # Note: IDs are strings "03", "13", etc.
    test_actors = ['13', '14']
    print(f"Testing on Actors: {test_actors}")

    test_mask = np.isin(groups, test_actors)

    X_test = X[test_mask]
    y_test = y_enc[test_mask]

    X_train_full = X[~test_mask]
    y_train_full = y_enc[~test_mask]

    X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.1, random_state=42)

print(f"Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")

# ==========================================
# 3. TRAINING
# ==========================================

model = build_model_b(input_shape=(193, 1), num_classes=len(classes))

# [cite_start]Optimizer: RMSProp, lr=0.00001 [cite: 232]
opt = optimizers.RMSprop(learning_rate=LEARNING_RATE)

model.compile(optimizer=opt,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Callbacks
early_stop = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=30, restore_best_weights=True)

print("\nStarting Training...")
history = model.fit(
    X_train, y_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=1
)

# ==========================================
# 4. EVALUATION
# ==========================================
print("\n--- FINAL EVALUATION ---")
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {acc*100:.2f}%")

# Detailed Metrics
y_pred = np.argmax(model.predict(X_test), axis=1)
f1 = f1_score(y_test, y_pred, average='weighted')
print(f"Test F1-Score: {f1:.4f}")

# Confusion Matrix
print("\nConfusion Matrix:")
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(f"Class Order: {classes}")

if EXPERIMENT_MODE == 'REPLICATION':
    print(f"\nTarget to match: ~96.34% (Paper Result) [cite: 349]")
elif EXPERIMENT_MODE == 'DISPROVE':
    print(f"\nIf this is significantly lower than 96%, you have successfully invalidated Model B.")

Loading EMO-DB data...
Classes: ['Angry' 'Fear' 'Happiness' 'Neutral' 'Sadness']

>>> MODE: DISPROVE (Speaker Strict) <<<
Testing on Actors: ['13', '14']
Train: (1404, 193, 1), Val: (156, 193, 1), Test: (480, 193, 1)

Starting Training...
Epoch 1/300
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 123ms/step - accuracy: 0.2878 - loss: 2.0067 - val_accuracy: 0.4551 - val_loss: 1.3736
Epoch 2/300
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.3793 - loss: 1.4595 - val_accuracy: 0.5321 - val_loss: 1.2175
Epoch 3/300
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.4611 - loss: 1.2959 - val_accuracy: 0.5513 - val_loss: 1.1283
Epoch 4/300
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.5047 - loss: 1.2108 - val_accuracy: 0.5641 - val_loss: 1.0665
Epoch 5/300
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5222 - loss: 1.