In [None]:
# ==================================================
# üîÅ Full Reproducibility + Warning Suppression Setup
# ==================================================
import os
import random
import logging
import warnings

# === ENVIRONMENT VARIABLES (SET BEFORE TF IMPORT) ===
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)           # Hash seed
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'           # Suppress TensorFlow logs
os.environ['TF_DETERMINISTIC_OPS'] = '0'           # Allow non-deterministic ops to avoid UnimplementedError
os.environ['CUDA_VISIBLE_DEVICES'] = '0'           # Set GPU ID (or "" to force CPU)

# === PYTHON SEED SETTINGS ===
random.seed(SEED)

# === SUPPRESS WARNINGS & LOGGING ===
logging.getLogger('absl').setLevel(logging.ERROR)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)

# === IMPORT LIBRARIES AFTER SEED SETTINGS ===
import numpy as np
import tensorflow as tf
import pandas as pd
import librosa
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
from tqdm import tqdm

# === NUMPY & TENSORFLOW SEEDS ===
np.random.seed(SEED)
tf.random.set_seed(SEED)

# === SINGLE-THREADING FOR FULL REPRODUCIBILITY ===
tf.config.threading.set_intra_op_parallelism_threads(1)
tf.config.threading.set_inter_op_parallelism_threads(1)

# ‚úÖ CHECK GPU AVAILABILITY
print("‚úÖ GPU Available:", tf.config.list_physical_devices('GPU'))

# ‚úÖ DATASET PATH
DATASET_PATH = r'D:\498R\BanglaSER'

# ‚úÖ EMOTION LABELS FROM FILENAME
# Format: Mode-StatementType-Emotion-Intensity-Statement-Repetition-Actor.wav
EMOTION_MAPPING = {
    '01': 'happy',
    '02': 'sad',
    '03': 'angry',
    '04': 'surprise',
    '05': 'neutral'
}

In [None]:
# Create dataset information from file structure
def create_dataset_info(root_path):
    data = []
    for root, dirs, files in os.walk(root_path):
        for file in files:
            if file.endswith('.wav'):
                # Parse the filename to get emotion
                # Format: Mode-StatementType-Emotion-Intensity-Statement-Repetition-Actor.wav
                parts = file.split('-')
                if len(parts) == 7:
                    emotion_code = parts[2]
                    emotion = EMOTION_MAPPING.get(emotion_code, 'unknown')
                    file_path = os.path.join(root, file)
                    data.append({
                        'file_path': file_path,
                        'emotion': emotion,
                        'emotion_code': emotion_code
                    })
    
    return pd.DataFrame(data)

# Create and display dataset info
df = create_dataset_info(DATASET_PATH)
print(f"Total samples: {len(df)}")
print(df['emotion'].value_counts())

In [None]:
import matplotlib.pyplot as plt

# Get class distribution
emotion_counts = df['emotion'].value_counts()

# Pie chart
fig, ax = plt.subplots(figsize=(8,8), facecolor='none')  # transparent figure background
ax.set_facecolor('none')  # transparent axes background

wedges, texts, autotexts = ax.pie(
    emotion_counts, 
    labels=emotion_counts.index, 
    autopct='%1.1f%%',
    startangle=90, 
    colors=plt.cm.Set3.colors,
    textprops={'fontsize': 19, 'fontweight': 'bold'}
)

# Bolden the percentages
for autotext in autotexts:
    autotext.set_fontsize(19)
    autotext.set_fontweight('bold')

plt.show()


In [None]:
import numpy as np
import librosa
from tqdm import tqdm
from collections import defaultdict
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

# ---------------------------
# Audio loading & utilities
# ---------------------------
TARGET_SR = 16000
DURATION_SEC = 4.0
TARGET_LEN = int(TARGET_SR * DURATION_SEC)

def load_audio_fixed(file_path, sr=TARGET_SR, duration=DURATION_SEC):
    audio, sr = librosa.load(file_path, sr=sr, duration=duration)
    if len(audio) < TARGET_LEN:
        audio = np.pad(audio, (0, TARGET_LEN - len(audio)), mode='constant')
    else:
        audio = audio[:TARGET_LEN]
    return audio, sr

def ensure_length(audio, target_len=TARGET_LEN):
    if len(audio) < target_len:
        audio = np.pad(audio, (0, target_len - len(audio)), mode='constant')
    elif len(audio) > target_len:
        audio = audio[:target_len]
    return audio

# ---------------------------
# Audio-domain augmentations
# ---------------------------
def aug_noise(audio, noise_std=0.005):
    return audio + np.random.normal(0.0, noise_std, size=audio.shape)

def aug_shift(audio, max_shift_ratio=0.1):
    """Zero-pad shift (no wrap-around)."""
    max_shift = int(len(audio) * max_shift_ratio)
    shift = np.random.randint(-max_shift, max_shift + 1)
    if shift == 0:
        return audio
    if shift > 0:
        # delay: pad at start, drop end
        return np.concatenate([np.zeros(shift, dtype=audio.dtype), audio[:-shift]])
    else:
        # advance: drop start, pad at end
        s = -shift
        return np.concatenate([audio[s:], np.zeros(s, dtype=audio.dtype)])

def aug_pitch(audio, sr=TARGET_SR, semitone_range=(-2, 2)):
    steps = np.random.uniform(semitone_range[0], semitone_range[1])
    y = librosa.effects.pitch_shift(y=audio, sr=sr, n_steps=steps)
    return ensure_length(y, TARGET_LEN)

def aug_stretch(audio, rate_range=(0.8, 1.25)):
    rate = np.random.uniform(rate_range[0], rate_range[1])
    # In some librosa versions, 'rate' is keyword-only
    y = librosa.effects.time_stretch(y=audio, rate=rate)
    return ensure_length(y, TARGET_LEN)

AUG_FUNCS = ['noise', 'shift', 'pitch', 'stretch']

def apply_aug(audio, method):
    if method == 'noise':
        return aug_noise(audio)
    elif method == 'shift':
        return aug_shift(audio)
    elif method == 'pitch':
        return aug_pitch(audio)
    elif method == 'stretch':
        return aug_stretch(audio)
    return audio

# ---------------------------
# Feature extraction (unchanged logic, but from audio)
# ---------------------------
def extract_features_from_audio(audio, sr=TARGET_SR):
    mfccs   = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
    mel_s   = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128)
    log_mel = librosa.power_to_db(mel_s)
    zcr     = librosa.feature.zero_crossing_rate(audio)
    chroma  = librosa.feature.chroma_stft(y=audio, sr=sr, n_chroma=12)
    rms     = librosa.feature.rms(y=audio)

    features = np.vstack([mfccs, log_mel, zcr, chroma, rms])  # (155, T)
    return features.T  # (T, 155)

# If you need the same signature as before:
def extract_features(file_path):
    audio, sr = load_audio_fixed(file_path, sr=TARGET_SR, duration=DURATION_SEC)
    return extract_features_from_audio(audio, sr)

# ---------------------------
# Label mapping (same as yours)
# ---------------------------
emotions = df['emotion'].unique()
label_mapping = {label: i for i, label in enumerate(emotions)}
reverse_mapping = {i: label for label, i in label_mapping.items()}
num_labels = len(emotions)

print("\nEmotion Label Mapping:")
for emotion, idx in label_mapping.items():
    print(f"{emotion}: {idx}")

# ---------------------------------------------
# PREPROCESS + AUDIO AUGMENTATION (before split)
# ---------------------------------------------
print("üîÅ Extracting ORIGINAL features for ALL files ...")
X_all, y_all = [], []
audios_by_class = defaultdict(list)

for _, row in tqdm(df.iterrows(), total=len(df), desc="All data"):
    audio, sr = load_audio_fixed(row['file_path'], sr=TARGET_SR, duration=DURATION_SEC)
    # keep raw audio per class for later augmentation
    label = label_mapping[row['emotion']]
    audios_by_class[label].append(audio)

    # original features
    feats = extract_features_from_audio(audio, sr)
    X_all.append(feats)
    y_all.append(label)

print(f"üìä Original samples: {len(X_all)}")

# Class-balanced augmentation over WHOLE set (audio-domain)
target_per_class = 1000  # your target
aug_X_all, aug_y_all = [], []

print("üéõÔ∏è Augmenting (audio-domain) to balance classes ...")
for label, audios in audios_by_class.items():
    count = len(audios)
    needed = max(0, target_per_class - count)
    i = 0
    while needed > 0:
        base = audios[i % count]
        method = AUG_FUNCS[np.random.randint(0, len(AUG_FUNCS))]
        aug_audio = apply_aug(base, method)
        aug_audio = ensure_length(aug_audio, TARGET_LEN)  # safety
        feats = extract_features_from_audio(aug_audio, TARGET_SR)
        aug_X_all.append(feats)
        aug_y_all.append(label)
        i += 1
        needed -= 1

# Combine originals + augmented (BEFORE split)
X_all = np.concatenate([np.array(X_all), np.array(aug_X_all)], axis=0)
y_all = np.concatenate([np.array(y_all), np.array(aug_y_all)], axis=0)

print(f"‚úÖ Final samples after class-balanced augmentation: {X_all.shape[0]} (shape per sample: {X_all.shape[1:]} )")

# -----------------
# Split (as you want)
# -----------------
X_train, X_temp, y_train, y_temp = train_test_split(
    X_all, y_all, test_size=0.2, random_state=42, stratify=y_all
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print(f"Train set: {len(X_train)} | Val set: {len(X_val)} | Test set: {len(X_test)}")

# ---------------------------------------
# Feature scaling (fit on TRAIN only)
# ---------------------------------------
# Flatten across time to fit a scaler on feature dims
T, F = X_train.shape[1], X_train.shape[2]
scaler = StandardScaler()
scaler.fit(X_train.reshape(-1, F))  # train-only fit

def transform_with_scaler(X, scaler):
    N, T, F = X.shape
    X2 = X.reshape(-1, F)
    X2 = scaler.transform(X2)
    return X2.reshape(N, T, F)

X_train = transform_with_scaler(X_train, scaler)
X_val   = transform_with_scaler(X_val, scaler)
X_test  = transform_with_scaler(X_test, scaler)

# -------------------------
# Build TensorFlow datasets
# -------------------------
batch_size = 32
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(len(X_train)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
val_ds   = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_ds  = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size).prefetch(tf.data.AUTOTUNE)

print("\n‚úÖ Dataset preparation complete (AUGMENT ‚Üí SPLIT).")

In [None]:
import matplotlib.pyplot as plt
import os

# Pick one sample (e.g., first training sample)
idx = 0  
audio = X_train[idx].reshape(-1)  

# Extract individual features
mfccs   = librosa.feature.mfcc(y=audio, sr=TARGET_SR, n_mfcc=13)
mel_s   = librosa.feature.melspectrogram(y=audio, sr=TARGET_SR, n_mels=128)
log_mel = librosa.power_to_db(mel_s)
zcr     = librosa.feature.zero_crossing_rate(audio)
chroma  = librosa.feature.chroma_stft(y=audio, sr=TARGET_SR, n_chroma=12)
rms     = librosa.feature.rms(y=audio)

features_dict = {
    "MFCCs": (mfccs, "magma"),
    "Log-Mel": (log_mel, "inferno"),
    "ZCR": (zcr, "plasma"),
    "Chroma": (chroma, "cividis"),
    "RMS": (rms, "coolwarm")
}

# Folder to save images
os.makedirs("feature_heatmaps_v2", exist_ok=True)

# Save each feature as a separate heatmap with new color + name
for name, (feat, cmap) in features_dict.items():
    plt.figure(figsize=(6,4))
    plt.imshow(feat, aspect='auto', origin='lower', cmap=cmap)
    plt.title(f"{name}")
    plt.xlabel("Time")
    plt.ylabel("Features")
    plt.colorbar(format="%+2.1f dB" if name=="Log-Mel" else None)
    plt.tight_layout()
    plt.savefig(
        f"feature_heatmaps_v2/{name}_v2.png",  # different filenames
        transparent=True,
        dpi=300
    )
    plt.close()

print("‚úÖ Saved 5 new heatmaps with different colors in 'feature_heatmaps_v2/' folder")

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Example: your real counts
before_counts = [len(audios) for label, audios in audios_by_class.items()]
after_counts  = [1000 for _ in audios_by_class]   # since you balanced to 1000
class_labels  = [reverse_mapping[label] for label in audios_by_class.keys()]

x = np.arange(len(class_labels))
width = 0.5  # üîπ Increased thickness

# üîπ Transparent figure + axes
fig, ax = plt.subplots(figsize=(10,6), facecolor='none')
ax.set_facecolor('none')

# Plot bars
rects1 = ax.bar(x - width/2, before_counts, width, 
                label="Before Augmentation", color='skyblue')
rects2 = ax.bar(x + width/2, after_counts, width, 
                label="After Augmentation", color='lightcoral')

# Titles and labels
ax.set_ylabel("Number of Samples", fontsize=16, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(class_labels, rotation=0, fontsize=14, fontweight='bold')  # üîπ No tilt
ax.tick_params(axis='y', labelsize=14, width=2)

# Add value labels above bars
def autolabel(rects, color):
    for rect in rects:
        height = rect.get_height()
        ax.annotate(f'{height}',
                    xy=(rect.get_x() + rect.get_width()/2, height),
                    xytext=(0, 5),
                    textcoords="offset points",
                    ha='center', va='bottom',
                    fontsize=14, fontweight='bold', color=color)

autolabel(rects1, 'black')
autolabel(rects2, 'black')

# Legend
ax.legend(
    loc='upper left',
    bbox_to_anchor=(1.02, 1),
    borderaxespad=0,
    frameon=False,
    fontsize=14
)

# üîπ Remove outer box (all spines)
for spine in ax.spines.values():
    spine.set_visible(False)

plt.tight_layout()

# Show transparent plot
plt.show()

# üîπ If saving, keep transparency
# plt.savefig("augmentation_counts.png", dpi=300, transparent=True)


In [None]:
input_shape =X_train[0].shape