In [6]:
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile 
from python_speech_features import mfcc , logfbank
import librosa 
import os, glob, pickle
from scipy import signal
import noisereduce as nr
from glob import glob
from IPython import get_ipython
get_ipython().run_line_magic('matplotlib', 'inline')
import soundfile
from tensorflow.keras.layers import Conv2D,MaxPool2D, Flatten, LSTM
from keras.layers import Dropout,Dense,TimeDistributed
from keras.models import Sequential
from keras.utils import to_categorical 
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import speech_recognition as sr
from scipy.fftpack import fft
from pydub import AudioSegment

emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}

#These are the emotions User wants to observe more :
observed_emotions={'02':'calm', '03':'happy','06':'fearful', '07':'disgust'}

#cleaning the data
def envelope(y , rate, threshold):
    mask=[]
    y=pd.Series(y).apply(np.abs)
    y_mean = y.rolling(window=int(rate/10) ,  min_periods=1 , center = True).mean()
    for mean in y_mean:
        if mean>threshold:
            mask.append(True)
        else:
            mask.append(False)
    return mask


#Allfiles has the list of paths of all the files in the directory 
Allfiles=[]
actors = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24']
for actor in actors:
    for emotion in emotions:
        if emotion == '01':
            for statement in range(1, 3):
                for repetition in range(1, 3):
                    file_path = fr"C:\Users\subik\MoodMeter\RAVDESS_dataset\Actor_{actor}\03-01-01-01-0{statement}-0{repetition}-{actor}.wav"
                    Allfiles.append(file_path)
                    signal , rate = librosa.load(file_path, sr=16000)
                    mask = envelope(signal,rate, 0.0005)
                    wavfile.write(filename= str(file_path), rate=rate,data=signal[mask])
        else:
            for intensity in range(1, 3):
                for statement in range(1, 3):
                    for repetition in range(1, 3):
                        file_path = fr"C:\Users\subik\MoodMeter\RAVDESS_dataset\Actor_{actor}\03-01-{emotion}-0{intensity}-0{statement}-0{repetition}-{actor}.wav"
                        Allfiles.append(file_path)
                        signal , rate = librosa.load(file_path, sr=16000)
                        mask = envelope(signal,rate, 0.0005)
                        wavfile.write(filename= str(file_path), rate=rate,data=signal[mask])

def calc_fft(y,rate):
    n = len(y)
    freq = np.fft.rfftfreq(n , d= 1/rate)
    Y= abs(np.fft.rfft(y)/n)
    return(Y,freq)

    
signals={}
fft={}
fbank={}
mfccs={}


#Extract features (mfcc, chroma, mel) from a sound file
def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(X))
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
        result=np.hstack((result, mfccs))
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
        result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T,axis=0)
        result=np.hstack((result, mel))
    return result


def load_data(test_size=0.33):
    x=[]
    y=[]
    for actor in actors:
        for emotion in emotions:
            if emotion == '01':
                for statement in range(1, 3):
                    for repetition in range(1, 3):
                        file_path = fr"C:\Users\subik\MoodMeter\RAVDESS_dataset\Actor_{actor}\03-01-01-01-0{statement}-0{repetition}-{actor}.wav"
                        if emotion in observed_emotions:    
                            feature=extract_feature(file_path, mfcc=True, chroma=True, mel=True)
                            x.append(feature)
                            y.append([emotion,file_path])
            else:
                for intensity in range(1, 3):
                    for statement in range(1, 3):
                        for repetition in range(1, 3):
                            file_path = fr"C:\Users\subik\MoodMeter\RAVDESS_dataset\Actor_{actor}\03-01-{emotion}-0{intensity}-0{statement}-0{repetition}-{actor}.wav"
                            if emotion in observed_emotions:    
                                feature=extract_feature(file_path, mfcc=True, chroma=True, mel=True)
                                x.append(feature)
                                y.append([emotion,file_path])
    return train_test_split(np.array(x), y, test_size=test_size, random_state=9)


x_train,x_test,y_trai,y_tes=load_data(test_size=0.5)
y_test_map = np.array(y_tes).T
y_test = y_test_map[0]
test_filename = y_test_map[1]
y_train_map = np.array(y_trai).T
y_train = y_train_map[0]
train_filename = y_train_map[1]


# Initialize the Multi Layer Perceptron Classifier
model=MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)
#Training the model
model.fit(x_train,y_train)

#saving the model
import pickle

Pkl_Filename = "Emotion_Voice_Detection_Model.pkl"  

with open(Pkl_Filename, 'wb') as file:  
    pickle.dump(model, file)
# Load the Model back from file
with open(Pkl_Filename, 'rb') as file:  
    Emotion_Voice_Detection_Model = pickle.load(file)

Emotion_Voice_Detection_Model

y_pred=Emotion_Voice_Detection_Model.predict(x_test)
y_pred

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%') 

#Storing the Prediction probabilities into CSV file
y_pred1 = pd.DataFrame(y_pred, columns=['predictions'])
y_pred1['file_names'] = test_filename
y_pred1.to_csv('predictionfinal.csv')

Accuracy: 77.08%
