In [None]:
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)
!ls '/content/drive/My Drive/speech-emotion-recognition-ravdess-data'
import librosa
import soundfile
import numpy as np
import os, glob, pickle
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC 
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn import metrics
def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as mySoundFile:
        X = mySoundFile.read(dtype="float32")
        sample_rate = mySoundFile.samplerate
        
        if chroma:    
            stft = np.abs(librosa.stft(X))
        result = np.array([])

        if mfcc:
            my_mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, my_mfccs))

        if chroma:
            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis = 0)
            result = np.hstack((result, chroma))
        
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)
            result = np.hstack((result, mel))

    return result
emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}
#DataFlair - Emotions to observe
observed_emotions=['calm', 'happy', 'fearful', 'disgust']

# Load the data and extract features for each sound file
def load_data(test_size = 0.2):
    x, y = [], []
    for file in glob.glob("/content/drive/My Drive/speech-emotion-recognition-ravdess-data/Actor_*/*.wav"):
        file_name = os.path.basename(file)
        emotion = emotions[file_name.split("-")[2]] # extracting the third value which is the motion number
        if emotion not in observed_emotions:
            continue
        feature = extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size = test_size, random_state = 9) 
# Split the dataset
x_train, x_test, y_train, y_test = load_data(test_size=0.25)
print("Train data: ", x_train.shape[0], "\nTest data: ", x_test.shape[0])
print(f'Features(Emotions) extracted: {x_train.shape[1]} emotions')

############ first predict

svm_model_linear = SVC(kernel = 'linear', C = 1).fit(x_train, y_train) 
svm_predictions = svm_model_linear.predict(x_test) 


print(accuracy_score(y_true=y_test,y_pred=svm_predictions))
print(classification_report(y_test,svm_predictions)) 
# creating a confusion matrix 
print(confusion_matrix(y_test, svm_predictions))


########### second predict

svm_model_linear = SVC().fit(x_train, y_train) 
svm_predictions = svm_model_linear.predict(x_test) 


print(accuracy_score(y_true=y_test,y_pred=svm_predictions))
print(classification_report(y_test,svm_predictions)) 
# creating a confusion matrix 
print(confusion_matrix(y_test, svm_predictions) )

Mounted at /content/drive/
Actor_01  Actor_04  Actor_07  Actor_10	Actor_13  Actor_16  Actor_19  Actor_22
Actor_02  Actor_05  Actor_08  Actor_11	Actor_14  Actor_17  Actor_20  Actor_23
Actor_03  Actor_06  Actor_09  Actor_12	Actor_15  Actor_18  Actor_21  Actor_24
Train data:  576 
Test data:  192
Features(Emotions) extracted: 180 emotions
0.609375
              precision    recall  f1-score   support

        calm       0.80      0.90      0.85        49
     disgust       0.54      0.58      0.56        43
     fearful       0.55      0.47      0.51        45
       happy       0.51      0.49      0.50        55

    accuracy                           0.61       192
   macro avg       0.60      0.61      0.60       192
weighted avg       0.60      0.61      0.60       192

[[44  2  1  2]
 [ 7 25  5  6]
 [ 2  4 21 18]
 [ 2 15 11 27]]
0.5052083333333334
              precision    recall  f1-score   support

        calm       0.60      0.86      0.71        49
     disgust       0.46      