In [1]:
!pip install librosa  scikit-learn pandas nump

Collecting nump
  Downloading nump-5.5.5.5-py3-none-any.whl.metadata (358 bytes)
Downloading nump-5.5.5.5-py3-none-any.whl (1.1 kB)
Installing collected packages: nump
Successfully installed nump-5.5.5.5


In [2]:
!pip install soundfile




In [4]:
# STEP 1 — Upload files
from google.colab import files
import os

# Create a folder for audio files
os.makedirs("data/audio", exist_ok=True)

print("📂 Upload your labels.csv file:")
uploaded = files.upload()  # Upload labels (1).csv

print("📂 Upload your audio files (.wav):")
uploaded_audio = files.upload()  # Upload wav file(s)

# Move uploaded audio to data/audio
for fn in uploaded_audio.keys():
    os.rename(fn, os.path.join("data/audio", fn))

print("\n✅ Upload complete! Files are ready.")

# STEP 2 — Install dependencies
!pip install librosa soundfile scikit-learn pandas numpy

# STEP 3 — Import libraries
import librosa, soundfile as sf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix

# STEP 4 — Feature extraction
def extract_features(path, sr_target=16000, n_mfcc=40):
    y, sr = sf.read(path)
    if y.ndim > 1:
        y = y.mean(axis=1)  # convert to mono
    if sr != sr_target:
        y = librosa.resample(y, orig_sr=sr, target_sr=sr_target)
        sr = sr_target
    y, _ = librosa.effects.trim(y, top_db=30)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    d1 = librosa.feature.delta(mfcc)
    d2 = librosa.feature.delta(mfcc, order=2)
    feat = np.vstack([mfcc, d1, d2])
    stats = np.hstack([feat.mean(axis=1), feat.std(axis=1)])
    return stats.astype(np.float32)

# STEP 5 — Load dataset
df = pd.read_csv("labels (1).csv")  # make sure file name matches exactly
print("📄 CSV Preview:")
print(df.head())

# Encode labels if text
if df['label'].dtype == object:
    le = LabelEncoder()
    df['label'] = le.fit_transform(df['label'])
    print("🔢 Label mapping:", dict(zip(le.classes_, le.transform(le.classes_))))

X, y = [], []
for _, row in df.iterrows():
    audio_path = os.path.join("data/audio", row['file'])
    if os.path.exists(audio_path):
        X.append(extract_features(audio_path))
        y.append(row['label'])
    else:
        print("⚠️ Missing file:", audio_path)

X = np.vstack(X)
y = np.array(y)

# STEP 6 — Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# STEP 7 — Train SVM model
clf = Pipeline([
    ("scaler", StandardScaler()),
    ("svm", SVC(kernel="rbf", class_weight="balanced", probability=True))
])
clf.fit(X_train, y_train)

# STEP 8 — Evaluate
y_pred = clf.predict(X_test)
print("\n📊 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\n📄 Classification Report:\n", classification_report(y_test, y_pred, digits=4))


📂 Upload your labels.csv file:


Saving labels (1).csv to labels (1).csv
📂 Upload your audio files (.wav):


Saving OAF_bar_angry.wav to OAF_bar_angry.wav

✅ Upload complete! Files are ready.
📄 CSV Preview:
   emotion_id      emotion_label
0           1            neutral
1           2              happy
2           3                sad
3           4              angry
4           5  pleasant_surprise


KeyError: 'label'

In [5]:
import pandas as pd

df = pd.read_csv("labels (1).csv")
print("📄 Column names in CSV:", df.columns.tolist())
print(df.head())


📄 Column names in CSV: ['emotion_id', 'emotion_label']
   emotion_id      emotion_label
0           1            neutral
1           2              happy
2           3                sad
3           4              angry
4           5  pleasant_surprise


In [7]:
from google.colab import files
import pandas as pd

# Upload many wav files
uploaded_wavs = files.upload()

# Upload labels.csv file
uploaded_csv = files.upload()

# Load labels
labels_df = pd.read_csv(list(uploaded_csv.keys())[0])
print("✅ Labels loaded")
print(labels_df.head())


Saving labels (1).csv to labels (1).csv


Saving labels (1).csv to labels (1) (1).csv
✅ Labels loaded
   emotion_id      emotion_label
0           1            neutral
1           2              happy
2           3                sad
3           4              angry
4           5  pleasant_surprise


In [8]:
# Create mapping dictionary
id2emotion = dict(zip(labels_df["emotion_id"], labels_df["emotion_label"]))
print("🔑 Mapping:", id2emotion)

def get_emotion_from_filename(filename):
    """
    Example: 'OAF_bar_angry.wav' -> 'angry'
    """
    emotion = filename.split("_")[-1].replace(".wav", "").lower().strip()
    return emotion


🔑 Mapping: {1: 'neutral', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'pleasant_surprise', 6: 'fear', 7: 'disgust', 8: 'yafsad', 9: 'oaf sad'}


In [9]:
import librosa
import numpy as np

def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    return np.mean(mfccs.T, axis=0)


In [11]:
X, y = [], []

for fname in uploaded_wavs.keys():
    if fname.endswith(".wav"):   # ✅ Only process wav files
        features = extract_features(fname)
        label = get_emotion_from_filename(fname)
        X.append(features)
        y.append(label)

print("✅ Features extracted:", len(X), "samples")


✅ Features extracted: 0 samples


In [13]:
# STEP 1 — Upload a single .wav file
from google.colab import files
import librosa, numpy as np

uploaded = files.upload()
file_name = list(uploaded.keys())[0]   # get uploaded filename

print("✅ Uploaded:", file_name)

# STEP 2 — Feature extraction function
def extract_features(path, sr_target=16000, n_mfcc=40):
    y, sr = librosa.load(path, sr=None)
    if y.ndim > 1:
        y = y.mean(axis=1)  # convert to mono
    if sr != sr_target:
        y = librosa.resample(y, orig_sr=sr, target_sr=sr_target)
        sr = sr_target
    y, _ = librosa.effects.trim(y, top_db=30)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    d1 = librosa.feature.delta(mfcc)
    d2 = librosa.feature.delta(mfcc, order=2)
    feat = np.vstack([mfcc, d1, d2])
    stats = np.hstack([feat.mean(axis=1), feat.std(axis=1)])
    return stats.astype(np.float32)

# STEP 3 — Extract features from your file
features = extract_features(file_name)
print("📊 Extracted feature vector shape:", features.shape)
print("🔍 First 10 features:", features[:10])


Saving OAF_bar_angry.wav to OAF_bar_angry.wav
✅ Uploaded: OAF_bar_angry.wav
📊 Extracted feature vector shape: (240,)
🔍 First 10 features: [-4.2354306e+02  6.3526436e+01 -1.3228546e+01 -2.8607525e+01
  1.9979217e+00 -2.6586151e-02 -9.2067404e+00 -8.8031731e+00
 -4.4694443e+00 -6.0611019e+00]
