In [1]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)


In [2]:
pip install librosa soundfile scikit-learn matplotlib seaborn


Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
import glob
import pandas as pd

# Step 1: Set the path to your extracted RAVDESS folder
root_dir = root_dir = "C:\\Users\\Ram\\Documents\\SAMRT\\smart_dataset"
 # <- change this to your real path

# Step 2: Recursively get all .wav files in all folders
wav_files = glob.glob(os.path.join(root_dir, "**", "*.wav"), recursive=True)

print(f"Found {len(wav_files)} .wav files")

# Step 3: Define emotion label map based on RAVDESS naming convention
emotion_map = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}

# Step 4: Create a DataFrame with paths and emotion labels
data = []
for fpath in wav_files:
    filename = os.path.basename(fpath)
    parts = filename.split("-")
    emotion_code = parts[2]
    emotion_label = emotion_map.get(emotion_code, "unknown")
    data.append({"file_path": fpath, "emotion": emotion_label})

df = pd.DataFrame(data)

# Step 5: Preview the DataFrame
df.head()


Found 2880 .wav files


Unnamed: 0,file_path,emotion
0,C:\Users\Ram\Documents\SAMRT\smart_dataset\Act...,neutral
1,C:\Users\Ram\Documents\SAMRT\smart_dataset\Act...,neutral
2,C:\Users\Ram\Documents\SAMRT\smart_dataset\Act...,neutral
3,C:\Users\Ram\Documents\SAMRT\smart_dataset\Act...,neutral
4,C:\Users\Ram\Documents\SAMRT\smart_dataset\Act...,calm


In [4]:
import librosa
import numpy as np
from tqdm import tqdm

# Function to extract MFCC, Chroma, and Mel features
def extract_features(file_path):
    try:
        # Load audio file
        y, sr = librosa.load(file_path, duration=3, offset=0.5)  # Load first 3s, skip 0.5s
        # MFCCs
        mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
        # Chroma
        chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0)
        # Mel Spectrogram
        mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
        # Combine all
        return np.hstack([mfcc, chroma, mel])
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None


In [5]:
# Create X (features) and y (labels)
X, y = [], []

# Use tqdm to show progress bar
for i, row in tqdm(df.iterrows(), total=len(df)):
    features = extract_features(row["file_path"])
    if features is not None:
        X.append(features)
        y.append(row["emotion"])


  "cipher": algorithms.TripleDES,
  "class": algorithms.Blowfish,
  "class": algorithms.TripleDES,
100%|██████████████████████████████████████████████████████████████████████████████| 2880/2880 [02:13<00:00, 21.62it/s]


In [11]:

from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC

from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE

from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

In [12]:
# Convert labels to numeric format
le = LabelEncoder()
y_encoded = le.fit_transform(y)


In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)


In [14]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('smote', SMOTE()),
    ('svc', SVC())
])


In [15]:
params = {
    'svc__C': [0.1, 1, 10],
    'svc__gamma': ['scale', 0.01, 0.001],
    'svc__kernel': ['rbf']
}


In [16]:
grid = GridSearchCV(pipeline, param_grid=params, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid.fit(X_train, y_train)


Fitting 5 folds for each of 9 candidates, totalling 45 fits


In [17]:
best_model = grid.best_estimator_
print("Best Parameters:", grid.best_params_)


Best Parameters: {'svc__C': 10, 'svc__gamma': 0.01, 'svc__kernel': 'rbf'}


In [18]:
X_test_scaled = best_model.named_steps['scaler'].transform(X_test)


In [19]:
y_pred = best_model.named_steps['svc'].predict(X_test_scaled)


In [20]:
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))


Test Accuracy: 0.9375
Classification Report:
               precision    recall  f1-score   support

       angry       0.94      0.92      0.93        79
        calm       0.91      1.00      0.95        69
     disgust       0.99      0.92      0.95        84
     fearful       0.94      0.96      0.95        80
       happy       0.93      0.94      0.93        82
     neutral       0.91      0.95      0.93        42
         sad       0.93      0.89      0.91        61
   surprised       0.95      0.92      0.94        79

    accuracy                           0.94       576
   macro avg       0.94      0.94      0.94       576
weighted avg       0.94      0.94      0.94       576



In [22]:
def predict_emotion(file_path, model, label_encoder):
    features = extract_features(file_path)
    if features is None:
        return "Error extracting features"
    features = features.reshape(1, -1)  # Reshape for sklearn
    prediction = model.predict(features)
    predicted_emotion = label_encoder.inverse_transform(prediction)
    return predicted_emotion[0]


In [25]:
test_file = r"C:\Users\Ram\Downloads\Compressed\OAF_angry\OAF_base_angry.wav"
predicted_emotion = predict_emotion(test_file,best_model, le)
print(f"Predicted emotion: {predicted_emotion}")


Predicted emotion: angry


In [31]:
test_file = r"C:\Users\Ram\Downloads\Compressed\OAF_Sad\OAF_book_sad.wav"
print("File exists:", os.path.exists(test_file))

predicted_emotion = predict_emotion(test_file, best_model, le)
print(f"Predicted emotion: {predicted_emotion}")


File exists: True
Predicted emotion: sad


In [34]:
test_file = r""
print("File exists:", os.path.exists(test_file))

predicted_emotion = predict_emotion(test_file, best_model, le)
print(f"Predicted emotion: {predicted_emotion}")


File exists: True
Predicted emotion: sad
