In [7]:
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import os

# Function to extract MFCC features
def extract_mfcc(file_path, n_mfcc=13):
    try:
        audio, sample_rate = librosa.load(file_path, sr=None)
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
        return mfccs.mean(axis=1)
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# Directory containing audio files
audio_dir = r'C:\Users\onkar\Downloads\TESS Toronto emotional speech set data\TESS Toronto emotional speech set data'  # Use the raw string notation for the file path

# List of file paths and corresponding labels
file_paths = []
labels = []

# Load audio files and their corresponding emotion labels
for emotion in os.listdir(audio_dir):
    emotion_dir = os.path.join(audio_dir, emotion)
    if os.path.isdir(emotion_dir):
        for file_name in os.listdir(emotion_dir):
            if file_name.endswith('.wav'):
                file_paths.append(os.path.join(emotion_dir, file_name))
                labels.append(emotion)  # Label is the name of the directory (e.g., "Angry", "Happy")

print(f"Found {len(file_paths)} files.")

# Feature extraction
features = [extract_mfcc(file_path) for file_path in file_paths]

# Filter out any None values that might have occurred during feature extraction
features = [f for f in features if f is not None]

# Check if features were extracted
if len(features) == 0:
    print("No features were extracted. Please check the file paths and audio files.")
else:
    print(f"Extracted features for {len(features)} files.")

    # Splitting data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

    # Training the Random Forest model
    model = RandomForestClassifier()
    model.fit(X_train, y_train)

    # Making predictions
    predictions = model.predict(X_test)

    # Evaluating the model
    accuracy = accuracy_score(y_test, predictions)
    print(f"Accuracy: {accuracy:.2f}")

    # Confusion Matrix and Classification Report
    conf_matrix = confusion_matrix(y_test, predictions)
    print("Confusion Matrix:")
    print(conf_matrix)

    class_report = classification_report(y_test, predictions)
    print("Classification Report:")
    print(class_report)


Found 2800 files.
Extracted features for 2800 files.
Accuracy: 0.95
Confusion Matrix:
[[49  0  0  0  0  3  0  0  0  0  0  0  0  0]
 [ 2 44  0  0  0  1  0  0  0  0  0  0  0  0]
 [ 0  0 46  0  0  0  3  0  0  0  0  0  0  0]
 [ 0  0  0 37  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0 36  1  2  0  0  0  0  0  0  0]
 [ 2  0  0  0  0 35  0  0  0  0  0  0  0  0]
 [ 0  0  2  0  1  0 27  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0 33  0  3  1  0  0  0]
 [ 0  0  0  0  0  0  0  0 35  0  0  2  2  0]
 [ 0  0  0  0  0  0  0  2  0 34  1  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0 39  0  1  0]
 [ 0  0  0  0  0  0  0  1  0  0  0 42  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0 40  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0 33]]
Classification Report:
                        precision    recall  f1-score   support

              OAF_Fear       0.92      0.94      0.93        52
 OAF_Pleasant_surprise       1.00      0.94      0.97        47
               OAF_Sad       0.96      0.94      0.95        49
   