In [133]:
import numpy as np
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from glob import glob
import librosa
import librosa.display
import IPython.display as ipd
from itertools import cycle
sns.set_theme(style="white", palette=None)
color_pal = plt.rcParams["axes.prop_cycle"].by_key()["color"]
color_cycle = cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"])

In [134]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,classification_report
from sklearn.preprocessing import StandardScaler,PowerTransformer
from imblearn.over_sampling import RandomOverSampler
from sklearn.svm import SVC
from sklearn.feature_selection import RFECV
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression

In [135]:
# === PATH SETUP ===
ravdess_path = r"C:\Users\abdul\Downloads\audio-testing\RAVDESS-AUDIO-DATASET"
crema_path = r"C:\Users\abdul\Downloads\audio-testing\CREMA-D -AUDIO-DATASET"
tess_path = r"C:\Users\abdul\Downloads\audio-testing\TESS Toronto emotional speech set data"

In [136]:
# === Label extractors for each dataset ===
def extract_label_ravdess(file):
    parts = os.path.basename(file).split("-")
    emotion_code = parts[2]
    return {
        '01': 'neutral',
        '02': 'calm',
        '03': 'happy',
        '04': 'sad',
        '05': 'angry',
        '06': 'fearful',
        '07': 'disgust',
        '08': 'surprised'
    }.get(emotion_code, "unknown")

def extract_label_crema(file):
    parts = os.path.basename(file).split('_')
    return {
        'ANG': 'angry',
        'DIS': 'disgust',
        'FEA': 'fearful',
        'HAP': 'happy',
        'NEU': 'neutral',
        'SAD': 'sad'
    }.get(parts[2], "unknown")

def extract_label_tess(file):
    name = os.path.basename(file)
    return name.split('_')[-1].replace('.wav', '').lower()

In [137]:
import numpy as np
import random
import librosa
from scipy.signal import butter, lfilter

# 1. Add White Noise
def add_noise(y, noise_factor=0.005):
    noise = np.random.randn(len(y))
    return y + noise_factor * noise

# 2. Shift Audio (Time Shift)
def shift_audio(y, shift_max=0.2):
    shift = int(random.uniform(-shift_max, shift_max) * len(y))
    return np.roll(y, shift)

# 3. Time Stretching
def stretch_audio(y, rate=1.1):
    try:
        return librosa.effects.time_stretch(y, rate)
    except Exception as e:
        print(f"Stretch error: {e}")
        return y

# 4. Pitch Shifting
def pitch_shift_audio(y, sr, n_steps=2):
    try:
        return librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)
    except Exception as e:
        print(f"Pitch shift error: {e}")
        return y

# 5. Dynamic Range Compression
def dynamic_range_compression(y, C=1, clip_val=0.05):
    return np.tanh(C * y) / clip_val

# 6. Volume Change (Random Gain)
def random_volume_change(y, low=0.8, high=1.2):
    return y * random.uniform(low, high)

# 7. Add Background Noise (Requires loading external background sound)
def add_background_noise(y, bg, snr_db=10):
    rms_y = np.sqrt(np.mean(y**2))
    rms_bg = np.sqrt(np.mean(bg**2))
    bg = bg[:len(y)] if len(bg) > len(y) else np.pad(bg, (0, len(y) - len(bg)), 'wrap')
    bg = bg * (rms_y / (10**(snr_db / 20)) / rms_bg)
    return y + bg

# 8. Bandpass Filter (Simulate mic/environment frequency limitations)
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    return butter(order, [low, high], btype='band')

def apply_bandpass_filter(y, sr, lowcut=300.0, highcut=3000.0):
    b, a = butter_bandpass(lowcut, highcut, sr, order=4)
    return lfilter(b, a, y)

# 9. Add Echo
def add_echo(y, sr, delay=0.2, decay=0.5):
    delay_samples = int(delay * sr)
    echo = np.zeros_like(y)
    echo[delay_samples:] = y[:-delay_samples]
    return y + decay * echo

# 10. Insert Random Silence
def insert_silence(y, sr, max_silence_sec=0.3):
    silence_duration = int(sr * random.uniform(0.1, max_silence_sec))
    silence = np.zeros(silence_duration)
    insert_point = random.randint(0, len(y) - silence_duration)
    return np.concatenate([y[:insert_point], silence, y[insert_point:]])


In [138]:
def extract_features(file, label_extractor, augment=False):
    try:
        # Load the audio file
        y, sr = librosa.load(file, sr=22050, mono=True)

        # Apply random augmentation if enabled
        if augment:
            aug_type = random.choice([
                'noise', 'shift', 'stretch', 'pitch', 'compression',
                'volume', 'echo', 'silence', 'bandpass'
            ])
            if aug_type == 'noise':
                y = add_noise(y)
            elif aug_type == 'shift':
                y = shift_audio(y)
            elif aug_type == 'stretch':
                y = stretch_audio(y, rate=random.uniform(0.8, 1.2))
            elif aug_type == 'pitch':
                y = pitch_shift_audio(y, sr=sr, n_steps=random.randint(-2, 2))
            elif aug_type == 'compression':
                y = dynamic_range_compression(y)
            elif aug_type == 'volume':
                y = random_volume_change(y)
            elif aug_type == 'echo':
                y = add_echo(y, sr)
            elif aug_type == 'silence':
                y = insert_silence(y, sr)
            elif aug_type == 'bandpass':
                y = apply_bandpass_filter(y, sr)

        features = []

        # MFCC
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        features.extend(np.mean(mfccs, axis=1))
        features.extend(np.std(mfccs, axis=1))

        # Chroma
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        features.extend(np.mean(chroma, axis=1))
        features.extend(np.std(chroma, axis=1))

        # Zero Crossing Rate
        zcr = librosa.feature.zero_crossing_rate(y)
        features.append(np.mean(zcr))
        features.append(np.std(zcr))

        # Root Mean Square Energy
        rms = librosa.feature.rms(y=y)
        features.append(np.mean(rms))
        features.append(np.std(rms))

        # Spectral Contrast
        contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
        features.extend(np.mean(contrast, axis=1))
        features.extend(np.std(contrast, axis=1))

        # Label
        label = label_extractor(file)

        return features, label

    except Exception as e:
        print(f"Error processing {file}: {e}")
        return None, None


In [139]:
# === Load and Extract Features from All Datasets ===
data = []

# RAVDESS
for file in glob(os.path.join(ravdess_path, '**', '*.wav'), recursive=True):
    f, l = extract_features(file, extract_label_ravdess)
    if f and l != "unknown":
        data.append((f, l))

# CREMA-D
for file in glob(os.path.join(crema_path, '*.wav')):
    f, l = extract_features(file, extract_label_crema)
    if f and l != "unknown":
        data.append((f, l))

# TESS
for file in glob(os.path.join(tess_path, '**', '*.wav')):
    f, l = extract_features(file, extract_label_tess)
    if f and l != "unknown":
        data.append((f, l))

In [140]:
# === Create DataFrame ===
features, labels = zip(*data)
df = pd.DataFrame(features)
df['labels'] = labels

In [141]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,59,60,61,62,63,64,65,66,67,labels
0,-697.792603,54.890041,0.663465,12.435786,7.733952,0.530750,-3.216631,-3.159394,-10.977551,-2.848711,...,16.763104,45.346246,10.712655,4.592027,5.519944,3.719688,3.160476,4.936189,4.376869,neutral
1,-692.855774,55.363895,-1.548319,16.038305,8.818810,-0.146586,-1.373392,-5.293180,-11.623182,-1.348284,...,16.843068,44.918227,10.221810,4.672584,5.678050,4.050266,3.132121,4.980014,4.449650,neutral
2,-691.587891,58.024662,0.159465,13.624650,5.374113,1.162337,-2.083360,-5.382585,-10.332824,-3.662081,...,16.694574,45.031481,10.610992,5.361405,6.009305,3.831685,2.924071,4.630189,4.303702,neutral
3,-685.105469,55.879421,2.783262,13.252023,6.989670,2.981274,-1.586029,-6.961661,-10.348489,-3.270769,...,16.067050,44.119110,9.945630,4.248116,4.827358,3.778959,2.933074,3.786002,4.105325,neutral
4,-727.104370,62.355034,3.121181,15.064669,8.132434,1.927084,-3.274656,-3.761792,-9.750299,-4.853837,...,16.215825,44.607496,10.949800,5.019456,6.459340,4.461460,3.718469,4.304718,4.069478,calm
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3718,-391.547699,61.473328,34.779984,42.554768,-2.182075,8.699821,-1.409615,-11.303340,0.550166,-4.555710,...,19.516767,46.948069,5.446670,8.392481,8.957269,5.791789,4.948725,4.011643,5.042315,sad
3719,-404.340485,76.251541,25.959827,39.575932,0.379420,-0.940992,-6.228340,-14.371888,1.734153,-11.790402,...,20.260181,47.407920,7.983101,7.196562,7.234455,5.722532,4.125881,3.680128,4.202790,sad
3720,-370.484589,65.668503,38.364941,41.208469,-4.550176,4.382659,-0.712863,-9.426854,2.177757,-7.615240,...,20.655909,47.932425,7.639609,7.988176,9.272413,6.288133,4.862371,3.617182,3.027488,sad
3721,-423.077972,69.036842,29.720037,37.226368,-3.204175,13.681798,-3.380287,-12.161002,-0.653987,-4.454652,...,19.170236,46.332107,7.829129,8.525364,8.347698,5.883293,4.931897,4.054578,5.303289,sad


In [142]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, PowerTransformer, StandardScaler
from sklearn.decomposition import PCA
from sklearn.utils.class_weight import compute_sample_weight
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
import pandas as pd

# === Create DataFrame ===
features, labels = zip(*data)
df = pd.DataFrame(features)
df['labels'] = labels

# === Encode Labels ===
le = LabelEncoder()
df['labels'] = le.fit_transform(df['labels'])

X = df.drop(columns='labels')
y = df['labels']

# === Train-test split (with stratification) ===
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)

# === Power Transform ===
pt = PowerTransformer()
X_train = pt.fit_transform(X_train)
X_test = pt.transform(X_test)

# === Standard Scaling ===
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# === SMOTE for Balanced Sampling ===
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_scaled, y_train)

# === PCA (after resampling) ===
pca = PCA(n_components=0.95, random_state=42)
X_train_pca = pca.fit_transform(X_train_resampled)
X_test_pca = pca.transform(X_test_scaled)

# === Class Weight Calculation ===
sample_weights = compute_sample_weight(class_weight='balanced', y=y_train_resampled)

# === XGBoost Model with Regularization ===
model = XGBClassifier(
    n_estimators=250,
    max_depth=3,
    learning_rate=0.02,
    subsample=0.8,
    colsample_bytree=0.7,
    reg_alpha=1.0,
    reg_lambda=2.0,
    use_label_encoder=False,
    eval_metric='mlogloss',
    random_state=42
)

# === Train Model ===
model.fit(X_train_pca, y_train_resampled, sample_weight=sample_weights)

# === Evaluate ===
from sklearn.metrics import classification_report

y_train_pred = model.predict(X_train_pca)
y_test_pred = model.predict(X_test_pca)

print("Train Classification Report:")
print(classification_report(y_train_resampled, y_train_pred))

print("Test Classification Report:")
print(classification_report(y_test, y_test_pred))


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Train Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.85      0.92       396
           1       0.74      0.99      0.84       396
           2       0.94      0.76      0.84       396
           3       0.99      1.00      0.99       396
           4       0.80      0.96      0.87       396
           5       0.94      0.75      0.83       396
           6       0.99      0.87      0.93       396
           7       0.90      0.95      0.93       396
           8       1.00      0.78      0.87       396
           9       0.76      0.97      0.85       396

    accuracy                           0.89      3960
   macro avg       0.91      0.89      0.89      3960
weighted avg       0.91      0.89      0.89      3960

Test Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.84      0.90       130
           1       0.42      0.97      0.59        32
           2       0.

In [143]:
# === Model Training ===
#rf=RandomForestClassifier(max_depth=7,n_estimators=200,random_state=42,class_weight="balanced")
#sv= SVC(kernel="linear", C=1, gamma=0.02,class_weight="balanced")
#rfecv_model=RFECV(estimator=rf,step=1,cv=5,scoring="accuracy",importance_getter="auto")
#rfecv_model.fit(X_train,y_train)
#y_pred=rfecv_model.predict(X_train)

In [144]:
#print("Classification Report:\n", classification_report(y_train, y_pred))

In [145]:
#sv= SVC(kernel="linear", C=1, gamma=0.02,class_weight="balanced")
#sv.fit(X_train,y_train)
#y_pred=sv.predict(X_test)
#classification_report(y_test,y_pred)

In [146]:
import pickle

# === Save Model and Preprocessors ===
with open("model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

with open("pt.pkl", "wb") as f:
    pickle.dump(pt, f)

with open("label_encoder.pkl", "wb") as f:
    pickle.dump(le, f)

with open("pca.pkl", "wb") as f:
    pickle.dump(pca, f)  # ✅ Save the PCA object


In [150]:
import numpy as np
import pickle
import librosa

# === Load model and preprocessors ===
with open("model.pkl", "rb") as f:
    model = pickle.load(f)

with open("scaler.pkl", "rb") as f:
    scaler = pickle.load(f)

with open("pt.pkl", "rb") as f:
    pt = pickle.load(f)

with open("label_encoder.pkl", "rb") as f:
    le = pickle.load(f)

with open("pca.pkl", "rb") as f:
    pca = pickle.load(f)  # ✅ Load PCA transformer

# === Feature extractor (same structure as training) ===
def extract_features_for_test(file):
    y, sr = librosa.load(file, sr=22050, mono=True)
    features = []

    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    features.extend(np.mean(mfccs, axis=1))
    features.extend(np.std(mfccs, axis=1))

    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    features.extend(np.mean(chroma, axis=1))
    features.extend(np.std(chroma, axis=1))

    zcr = librosa.feature.zero_crossing_rate(y)
    features.append(np.mean(zcr))
    features.append(np.std(zcr))

    rms = librosa.feature.rms(y=y)
    features.append(np.mean(rms))
    features.append(np.std(rms))

    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    features.extend(np.mean(contrast, axis=1))
    features.extend(np.std(contrast, axis=1))

    return np.array(features)

# === Path to your audio file ===
file_path = r"C:\Users\abdul\Downloads\audio-testing\sad.wav"

# === Extract and preprocess ===
features = extract_features_for_test(file_path)
X = features.reshape(1, -1)
X = pt.transform(X)
X = scaler.transform(X)
X = pca.transform(X)  # ✅ Apply PCA to match model input shape

# === Predict Emotion ===
pred = model.predict(X)
emotion = le.inverse_transform([pred[0]])[0]

print("🎧 Predicted Emotion:", emotion)


🎧 Predicted Emotion: calm
