<a href="https://colab.research.google.com/github/Puneeth-Abhishek-6622/Speech-Emotion-Recognition-/blob/main/Kannada.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
dataset_path = "/content/drive/MyDrive/kannada dataset"
output_path = "/content/drive/MyDrive/kannada_100 samples"

16 samples each for Anger & Sadness

17 samples each for Surprise, Happiness, Fear & Neutral


In [2]:
import os
import shutil
import random

# Define dataset path and output directory
dataset_path = "/content/drive/MyDrive/kannada dataset"
output_path = "/content/drive/MyDrive/kannada_100_samples"

# Ensure output directory exists
os.makedirs(output_path, exist_ok=True)

# Mapping emotion numbers to desired count
emotion_counts = {"01": 16, "02": 16, "03": 17, "04": 17, "05": 17, "06": 17}

# Organize files by emotion
emotion_files = {key: [] for key in emotion_counts.keys()}

# Scan dataset folder
for file in os.listdir(dataset_path):
    if file.endswith(".wav"):
        parts = file.split("-")  # Extract emotion from filename
        if len(parts) >= 3:
            emotion_code = parts[1]  # EE part of AA-EE-SS.wav
            if emotion_code in emotion_files:
                emotion_files[emotion_code].append(file)

# Select required number of files per emotion
selected_files = []
for emotion, files in emotion_files.items():
    random.shuffle(files)  # Shuffle to ensure randomness
    selected_files.extend(files[:emotion_counts[emotion]])  # Pick required count

# Copy selected files to output folder
for file in selected_files:
    src = os.path.join(dataset_path, file)
    dst = os.path.join(output_path, file)
    shutil.copy(src, dst)

print(f"✅ Extracted {len(selected_files)} samples to {output_path}")


✅ Extracted 100 samples to /content/drive/MyDrive/kannada_100_samples


Dataset Distribtuion

In [3]:
import os
from collections import Counter

# Define dataset path
dataset_path = "/content/drive/MyDrive/kannada_100_samples"

# Initialize counter
emotion_counts = Counter()

# Scan dataset folder
for file in os.listdir(dataset_path):
    if file.endswith(".wav"):
        parts = file.split("-")  # Extract emotion from filename
        if len(parts) >= 3:
            emotion_code = parts[1]  # EE part of AA-EE-SS.wav
            emotion_counts[emotion_code] += 1

# Emotion Mapping
emotion_labels = {
    "01": "Anger",
    "02": "Sadness",
    "03": "Surprise",
    "04": "Happiness",
    "05": "Fear",
    "06": "Neutral"
}

# Print distribution
print("Emotion Distribution in Kannada 100 Samples Dataset:")
for code, count in sorted(emotion_counts.items()):
    emotion_name = emotion_labels.get(code, "Unknown")
    print(f"{emotion_name}: {count} samples")


Emotion Distribution in Kannada 100 Samples Dataset:
Anger: 16 samples
Sadness: 16 samples
Surprise: 17 samples
Happiness: 17 samples
Fear: 17 samples
Neutral: 17 samples


Feature Extraction

In [13]:
import librosa
import numpy as np
import pandas as pd
import os

# Define dataset path
dataset_path = "/content/drive/MyDrive/kannada_100_samples"
output_csv = "/content/drive/MyDrive/kannada_features.csv"

# Emotion mapping from filename convention
emotion_mapping = {
    "01": 0,  # Anger
    "02": 4,  # Sadness
    "03": 2,  # Surprise (mapped to Happy in previous datasets)
    "04": 2,  # Happiness
    "05": 1,  # Fear
    "06": 3   # Neutral
}

# Feature extraction function
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=16000)  # Load audio

    # Compute features
    zcr = np.mean(librosa.feature.zero_crossing_rate(y))
    rmse = np.mean(librosa.feature.rms(y=y))
    mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40), axis=1)
    mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128), axis=1)

    # Concatenate all features into one array
    features = np.hstack([mfccs, mel, zcr, rmse])
    return features

# Initialize dataframe list
data = []

# Process each file
for file in os.listdir(dataset_path):
    if file.endswith(".wav"):
        file_path = os.path.join(dataset_path, file)
        parts = file.split("-")

        if len(parts) >= 3:
            emotion_code = parts[1]  # Extract emotion from filename
            emotion_label = emotion_mapping.get(emotion_code, -1)  # Get mapped label

            if emotion_label != -1:
                features = extract_features(file_path)
                data.append(np.append(features, emotion_label))

# Create DataFrame
columns = [f"MFCC_{i}" for i in range(40)] + [f"Mel_{i}" for i in range(128)] + ["ZCR", "RMSE", "Emotion"]
df_kannada = pd.DataFrame(data, columns=columns)

# Save to CSV
df_kannada.to_csv(output_csv, index=False)
print(f"Feature extraction complete. Saved to {output_csv}")


Feature extraction complete. Saved to /content/drive/MyDrive/kannada_features.csv


In [14]:
df_kannada.head()

Unnamed: 0,MFCC_0,MFCC_1,MFCC_2,MFCC_3,MFCC_4,MFCC_5,MFCC_6,MFCC_7,MFCC_8,MFCC_9,...,Mel_121,Mel_122,Mel_123,Mel_124,Mel_125,Mel_126,Mel_127,ZCR,RMSE,Emotion
0,-184.97493,109.272942,3.318561,31.888645,-9.556284,-17.089952,-22.274029,0.555418,-7.116153,-0.174785,...,0.094565,0.110699,0.098923,0.019076,0.027043,0.015498,0.001121,0.087505,0.189013,0.0
1,-380.366913,64.282349,-7.268876,24.289621,2.299709,-9.457098,-24.159002,-8.645685,-6.868415,-1.544703,...,0.005491,0.003288,0.005338,0.003725,0.002587,0.001087,0.000108,0.143466,0.017118,0.0
2,-452.157715,69.683388,14.694557,32.453037,-16.51195,-0.882377,-9.646268,-15.000087,-19.464134,-3.085853,...,0.000555,0.000327,0.00033,0.000744,0.000712,0.000433,2.7e-05,0.145203,0.013098,0.0
3,-195.14801,72.84655,10.965631,27.188097,-18.668703,0.928975,-6.489493,7.185298,-8.32596,-8.6379,...,0.022929,0.015281,0.025593,0.0207,0.017474,0.006824,0.000264,0.108162,0.098987,0.0
4,-293.174957,69.687881,-28.678059,26.807587,-14.607823,-12.047147,-26.25717,-21.017145,-9.563683,-4.031392,...,0.00488,0.009098,0.007091,0.00167,0.001496,0.000663,5.2e-05,0.140098,0.045623,0.0


In [8]:
df_kannada = df_kannada.drop(columns=['Emotion'])


In [9]:
df_kannada.shape

(100, 170)

In [10]:
df_kannada.to_csv("/content/drive/MyDrive/kannada_features_no_emotion.csv", index=False)


#Verifying the Model performance on Kannada dataset

In [16]:

from sklearn.preprocessing import StandardScaler

df = df_kannada
# Separate features (X) and target labels (y)
X = df.drop(columns=['Emotion'])  # Drop the 'Emotion' column
y = df['Emotion']  # Store the labels separately

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)  # Fit & transform the features

# Convert back to DataFrame (optional, for better readability)
df_scaled = pd.DataFrame(X_scaled, columns=X.columns)
df_scaled['Emotion'] = y  # Re-add the target column

# Print sample normalized features
print(f"Sample normalized features (first row): {df_scaled.iloc[0].values}")

Sample normalized features (first row): [ 1.41717772  1.21559676  0.07280123  0.57549703 -0.10198523 -0.8077392
 -0.48385643  0.44888829  0.62388689  0.6955897   0.41250656  0.19972222
  2.117203    1.66141863  0.10920666 -0.88462292  1.93057058 -1.34987419
  0.68442131 -0.81982165 -0.70697841 -0.76145259 -0.3474825  -0.76053937
 -0.15686583 -1.19431984 -0.15603213 -0.72317669 -1.20327509 -0.57366944
 -1.38911934 -0.34888023 -1.2597617  -0.60930607 -1.13880928 -0.55038735
  0.05695931 -0.60260035  1.66882032  2.53758709  0.71977223  0.25914323
 -0.17402379 -0.43512584  0.09349246  6.6247072   3.64699951 -0.12678206
 -0.54606432 -0.55494081 -0.40120677  1.31693041  2.45098313  0.41933234
  0.15813364  0.73122412  0.20149852  2.02055832  5.32933033  4.75938141
  2.49274161  2.12162109  1.74733151  1.62411244  4.74832359  7.68928212
  3.80447336  2.21268925  6.2333096   4.58447197  5.1506235   6.93181781
  1.30693735  0.58011968  5.69712932  6.14005844  2.21097901  1.97295129
  0.93764085

Loading the model

In [17]:
from tensorflow.keras.models import load_model

# Load your trained model (update path if necessary)a
model = load_model("/content/drive/MyDrive/speech_emotion_recognition_model.h5")



In [19]:
model.input_shape

(None, 170, 1)

In [20]:
df.shape

(100, 171)

In [22]:
X = df.drop(columns=['Emotion'])
X.shape

(100, 170)

In [25]:
X_kannada = np.expand_dims(X, axis=-1)  # Model expects (None, 170, 1)

# Make predictions
predictions = model.predict(X_kannada)
predicted_labels = np.argmax(predictions, axis=1)  # Get the emotion class with highest probability

# Print sample predictions
for i in range(5):  # Print first 5 predictions
    print(f"Predicted Label: {predicted_labels[i]}")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
Predicted Label: 0
Predicted Label: 0
Predicted Label: 0
Predicted Label: 0
Predicted Label: 0


In [26]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Compare predictions with actual labels
accuracy = accuracy_score(y, predicted_labels)
print(f"Model Accuracy: {accuracy:.2%}")

# Print classification report
print("\nClassification Report:")
print(classification_report(y, predicted_labels))

# Show confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y, predicted_labels))


Model Accuracy: 20.00%

Classification Report:
              precision    recall  f1-score   support

         0.0       0.17      1.00      0.30        16
         1.0       0.50      0.18      0.26        17
         2.0       0.00      0.00      0.00        34
         3.0       0.00      0.00      0.00        17
         4.0       0.50      0.06      0.11        16

    accuracy                           0.20       100
   macro avg       0.23      0.25      0.13       100
weighted avg       0.19      0.20      0.11       100


Confusion Matrix:
[[16  0  0  0  0]
 [13  3  0  0  1]
 [31  3  0  0  0]
 [17  0  0  0  0]
 [15  0  0  0  1]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
