In [3]:
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)
!ls '/content/drive/My Drive/speech-emotion-recognition-ravdess-data'
import librosa
import soundfile
import numpy as np
import os, glob, pickle
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn import metrics
def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as mySoundFile:
        X = mySoundFile.read(dtype="float32")
        sample_rate = mySoundFile.samplerate
        
        if chroma:    
            stft = np.abs(librosa.stft(X))
        result = np.array([])

        if mfcc:
            my_mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, my_mfccs))

        if chroma:
            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis = 0)
            result = np.hstack((result, chroma))
        
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)
            result = np.hstack((result, mel))

    return result
emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}
#DataFlair - Emotions to observe
observed_emotions=['calm', 'happy', 'fearful', 'disgust']

# Load the data and extract features for each sound file
def load_data(test_size = 0.2):
    x, y = [], []
    for file in glob.glob("/content/drive/My Drive/speech-emotion-recognition-ravdess-data/Actor_*/*.wav"):
        file_name = os.path.basename(file)
        emotion = emotions[file_name.split("-")[2]] # extracting the third value which is the motion number
        if emotion not in observed_emotions:
            continue
        feature = extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size = test_size, random_state = 9) 
# Split the dataset
x_train, x_test, y_train, y_test = load_data(test_size=0.25)
print("Train data: ", x_train.shape[0], "\nTest data: ", x_test.shape[0])
print(f'Features(Emotions) extracted: {x_train.shape[1]} emotions')

### Start K nearst neighbours algo

####### Default kNN  ########
model = KNeighborsClassifier(
)

model.fit(x_train, y_train)

print(f'Default kNN Model\'s accuracy on training set is {100*model.score(x_train, y_train):.2f}%')
print(f'Default kNN Model\'s accuracy on test set is {100*model.score(x_test, y_test):.2f}%\n')

##### (hastily) tuned kNN ######
model = KNeighborsClassifier(
    n_neighbors = 5,
    weights = 'distance',
    algorithm = 'brute',
    leaf_size = '30',
    n_jobs=4
)

model.fit(x_train, y_train)

print(f'kNN Model\'s accuracy on training set is {100*model.score(x_train, y_train):.2f}%')
print(f'kNN Model\'s accuracy on test set is {100*model.score(x_test, y_test):.2f}%')

Mounted at /content/drive/
Actor_01  Actor_04  Actor_07  Actor_10	Actor_13  Actor_16  Actor_19  Actor_22
Actor_02  Actor_05  Actor_08  Actor_11	Actor_14  Actor_17  Actor_20  Actor_23
Actor_03  Actor_06  Actor_09  Actor_12	Actor_15  Actor_18  Actor_21  Actor_24
Train data:  576 
Test data:  192
Features(Emotions) extracted: 180 emotions
Default kNN Model's accuracy on training set is 74.83%
Default kNN Model's accuracy on test set is 63.54%

kNN Model's accuracy on training set is 100.00%
kNN Model's accuracy on test set is 64.58%
