In [1]:

# Install the necessary libraries
!pip install librosa numpy scikit-learn soundfile






In [2]:

# Import the required libraries
import librosa
import soundfile
import numpy as np
import os, glob, pickle
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [3]:

# Load the dataset (e.g., Emo-DB or another available dataset)
# Modify the path as per your dataset location

DATASET_PATH = "D:\\Projects\\speech-emotion-recognition-ravdess-data\\Actor_14"

# For demonstration, replace with your own dataset path
# If using a specific dataset like Emo-DB, you might download and unzip it here

# Example for Emo-DB (Berlin Emotional Speech Database):
# https://www.kaggle.com/datasets/ejlok1/berlin-database-of-emotional-speech-emodb

# Load the audio files
audio_files = glob.glob(os.path.join(DATASET_PATH, "*.wav"))
print(f"Found {len(audio_files)} audio files.")


Found 60 audio files.


In [4]:

# Define the feature extraction function
def extract_feature(file_name, mfcc=True, chroma=True, mel=True):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        if chroma:
            stft = np.abs(librosa.stft(X))
        result = np.array([])
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, mfccs))
        if chroma:
            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
            result = np.hstack((result, chroma))
        if mel:
            mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T, axis=0)
            result = np.hstack((result, mel))
    return result


In [5]:

# Prepare the dataset
# Define emotions and observed emotions based on the dataset used
# Modify the emotions dictionary based on the dataset labels

emotions = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust',
    '08': 'surprised'
}

observed_emotions = ['calm', 'happy', 'sad', 'angry']

# Extract features and labels
X, y = [], []
for file in audio_files:
    file_name = os.path.basename(file)
    emotion = emotions[file_name.split("-")[2]]  # Adjust index based on the dataset
    if emotion not in observed_emotions:
        continue
    feature = extract_feature(file, mfcc=True, chroma=True, mel=True)
    X.append(feature)
    y.append(emotion)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(np.array(X), y, test_size=0.25, random_state=42)
print(f"Training set size: {X_train.shape[0]}, Testing set size: {X_test.shape[0]}")


Training set size: 24, Testing set size: 8


In [6]:

# Initialize and train the SVM model
model = SVC(kernel='linear', C=1)
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)


In [7]:

# Evaluating the model
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("Classification Report:")
print(classification_report(y_test, y_pred))


Confusion Matrix:
[[3 0 1]
 [0 3 0]
 [0 0 1]]
Classification Report:
              precision    recall  f1-score   support

       angry       1.00      0.75      0.86         4
       happy       1.00      1.00      1.00         3
         sad       0.50      1.00      0.67         1

    accuracy                           0.88         8
   macro avg       0.83      0.92      0.84         8
weighted avg       0.94      0.88      0.89         8



In [8]:
accuracy=accuracy_score(y_true=y_test,y_pred=y_pred)
print("Accuracy:{:.2f}%".format(accuracy*100))

Accuracy:87.50%


In [9]:

# Save the trained model to a file
model_filename = 'modelForPrediction.sav'
with open(model_filename, 'wb') as model_file:
    pickle.dump(model, model_file)

# Load the model and make a prediction
loaded_model = pickle.load(open(model_filename, 'rb'))
sample_feature = X_test[0].reshape(1, -1)  # Example of making a prediction with the loaded model
sample_prediction = loaded_model.predict(sample_feature)
print(f"Predicted emotion: {sample_prediction[0]}")


Predicted emotion: angry
