In [18]:
# Cell 1: Import necessary libraries
import os
import glob
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
import pickle


In [19]:
# Cell 2: Define emotions and feature extraction function

# Emotions mapping
emotions = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}

observed_emotions = ["neutral", "happy", "sad", "angry"]

def extract_feature(file_name, mfcc=True, chroma=True, mel=True):
    """
    Extract features from an audio file using librosa.
    
    Parameters:
        file_name (str): Path to the audio file.
        mfcc (bool): Whether to extract MFCC features.
        chroma (bool): Whether to extract Chroma features.
        mel (bool): Whether to extract Mel-spectrogram features.

    Returns:
        features (list): List of extracted features.
    """
    audio, sr = librosa.load(file_name, sr=None)  # Load the audio file
    
    features = []

    if mfcc:
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
        mfccs = np.mean(mfccs, axis=1)
        features.extend(mfccs)

    if chroma:
        chroma_features = librosa.feature.chroma_stft(y=audio, sr=sr)
        chroma_features = np.mean(chroma_features, axis=1)
        features.extend(chroma_features)

    if mel:
        mel_features = librosa.feature.melspectrogram(y=audio, sr=sr)
        mel_features = np.mean(mel_features, axis=1)
        features.extend(mel_features)

    return features


In [20]:
# Cell 3: Load dataset and extract features
def load_data(dataset_path, test_size=0.25):
    """
    Load the dataset and extract features for each audio file.
    
    Parameters:
        dataset_path (str): Path to the dataset folder.
        test_size (float): Proportion of the dataset to include in the test split.

    Returns:
        x_train, x_test, y_train, y_test: Training and testing datasets (features and labels).
    """
    x, y = [], []
    
    # Use glob to find all .wav files in the dataset
    audio_files = glob.glob(os.path.join(dataset_path, "Actor_*", "*.wav"))

    if not audio_files:
        raise FileNotFoundError(f"No audio files found in the specified path: {dataset_path}")
    
    print(f"Found {len(audio_files)} audio files.")
    
    # Process each file
    for file in audio_files:
        try:
            file_name = os.path.basename(file)
            emotion = emotions[file_name.split("-")[2]]  # Extract emotion from the filename
            
            # Skip emotions not in the observed list
            if emotion not in observed_emotions:
                continue

            # Extract features
            feature = extract_feature(file, mfcc=True, chroma=True, mel=True)
            x.append(feature)
            y.append(emotion)
        except Exception as e:
            print(f"Error processing file {file}: {e}")

    if not x or not y:
        raise ValueError("No data was loaded. Please check the dataset and preprocessing steps.")
    
    # Convert to numpy array for ML processing
    x = np.array(x)

    # Split the data into training and testing sets
    return train_test_split(x, y, test_size=test_size, random_state=9)


In [21]:
# Cell 4: Preprocess data (Normalize features)
# Load data
dataset_path = "C:/Users/veere/Downloads/Speech Analysis project/speech-emotion-recognition-ravdess-data"
x_train, x_test, y_train, y_test = load_data(dataset_path, test_size=0.25)

# Normalize the features using StandardScaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

print(f"Training set size: {len(x_train)}, Testing set size: {len(x_test)}")
print(f"Features extracted: {x_train.shape[1]}")


Found 1440 audio files.
Training set size: 504, Testing set size: 168
Features extracted: 153


In [22]:
# Cell 5: Train the model (MLP Classifier)
# Train an MLPClassifier
mlp = MLPClassifier(alpha=0.01, batch_size=256, hidden_layer_sizes=(300,),
                    learning_rate='adaptive', max_iter=500)

mlp.fit(x_train, y_train)

# Evaluate the model
y_pred = mlp.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 72.62%
Classification Report:
              precision    recall  f1-score   support

       angry       0.88      0.94      0.91        49
       happy       0.67      0.64      0.66        45
     neutral       0.57      0.59      0.58        27
         sad       0.69      0.66      0.67        47

    accuracy                           0.73       168
   macro avg       0.70      0.71      0.71       168
weighted avg       0.72      0.73      0.72       168



In [29]:
# Cell 6: Save the trained model using pickle
# Save model to disk
filename = 'speech_emotion_model.pkl'
with open(filename, 'wb') as file:
    pickle.dump(mlp, file)

# Load the model from disk
with open(filename, 'rb') as file:
    loaded_model = pickle.load(file)

# Test the loaded model
test_file = "speech-emotion-recognition-ravdess-data/Actor_01/03-01-08-02-02-02-01.wav"
feature = extract_feature(test_file, mfcc=True, chroma=True, mel=True)
feature = np.array(feature).reshape(1, -1)  # Reshape for prediction

# Normalize the feature before prediction
feature = scaler.transform(feature)
prediction = loaded_model.predict(feature)

print(f"Predicted emotion: {prediction[0]}")


Predicted emotion: happy


In [30]:
# Assuming your model is already trained, like so:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# Step 1: Train the model
model = MLPClassifier(alpha=0.01, batch_size=256, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)
model.fit(x_train, y_train)

# Step 2: Make predictions on the test set
y_pred = model.predict(x_test)

# Step 3: Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

print(f"Model accuracy: {accuracy * 100:.2f}%")


Model accuracy: 74.40%
