# Libraries

# Helper Functions

In [38]:
def read_wav_file(path, file):
    
    data, samplerate = sf.read(path+file)
    return data, samplerate 

def display_waveplot(data, sr):
    
    plt.figure(figsize=(14, 5))
    librosa.display.waveplot(data, sr=sr)
    plt.grid()
    plt.show()
    
def plot_spectrogram(data, samplerate):
    
    sr = samplerate
    spectrogram = librosa.feature.melspectrogram(data, sr=sr)
    log_spectrogram = librosa.power_to_db(spectrogram, ref=np.max)
    librosa.display.specshow(log_spectrogram, sr=sr, x_axis='time', y_axis='mel')
    

class DataGenerator(Sequence):
    def __init__(self, path, list_IDs, labels, batch_size):
        self.path = path
        self.list_IDs = list_IDs
        self.labels = labels
        self.batch_size = batch_size
        self.indexes = np.arange(len(self.list_IDs))
        self.data_lenght = 254000
        self.num_labels = 8
    
    def __len__(self):
        len_ = int(len(self.list_IDs)/self.batch_size)
        if len_*self.batch_size < len(self.list_IDs):
            len_ += 1
        return len_
    
    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        X, y = self.__data_generation(list_IDs_temp)
        X = X.reshape((self.batch_size, 100, 2540//2))
        return X, y
    
    def __data_generation(self, list_IDs_temp):
        X = np.zeros((self.batch_size, self.data_lenght//2))
        y = np.zeros((self.batch_size, self.num_labels))
        for i, ID in enumerate(list_IDs_temp):
            file = self.labels.loc[i, 'File']
            actor = file.split('.')[0].split('-')[-1]
            path_file = self.path+'Actor_'+str(actor+'/')
            audio_file, audio_sr = read_wav_file(path_file, file)
            lenght = len(audio_file)
            audio_file_fft = np.abs(np.fft.fft(audio_file)[: lenght//2])
            # scale data
            audio_file_fft = (audio_file_fft-audio_file_fft.mean())/audio_file_fft.std()
            X[i, :(lenght//2)] = audio_file_fft
            y[i, ] = self.labels.loc[ID, self.labels.columns[:-1]].values
        return X, y

# Libraries


In [39]:
import os
import random
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import soundfile as sf
import librosa
import librosa.display
import IPython.display as display

from keras.utils import Sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv1D, MaxPool1D, BatchNormalization
from keras.optimizers import RMSprop,Adam

import warnings
warnings.filterwarnings("ignore")

# Path

In [40]:
path = '/kaggle/input/ravdess-emotional-speech-audio/'
#os.listdir(path)[0:2]

# Labels

In [41]:
emotions = {1: 'neutral', 2: 'calm', 3: 'happy', 4: 'sad', 5: 'angry', 6: 'fearful', 7: 'disgust', 8: 'surprised'}

# Overview
There are 24 actors. For each of the actores there are existing 60 wave files.
We create a data frame with all meta data informations:

In [42]:
actors = ['Actor_'+str(i).zfill(2) for i in range(1, 25)]

In [43]:
columns = ['File', 'Modality', 'Vocal', 'Emotion', 'Intensity', 'Statement', 'Repetition', 'Actor']
df = pd.DataFrame(columns = columns)
files = []
for actor in actors:
    files.extend(os.listdir(path+actor))
df['File'] = files
file = files[0]
for i in range(len(files)):
    file = files[i]
    integer_list = list(map(int, file.split('.')[0].split('-')))
    df.loc[i, df.columns[1:]] = integer_list

df.head()

# A Sample File
We take focus on the first sample of the data frame to show how to handle and interpret the file.




In [44]:
row = 0
file = df.loc[row, 'File']
path_file = path+'Actor_'+str(df.loc[row, 'Actor']).zfill(2)+'/'
file

In [45]:
display.Audio(path_file+file)

In [46]:
# loading wave file
data, sr = read_wav_file(path_file, file)
print('Lenght Data Array:', len(data))
print('Samplerate:', sr)
print('Lenght Audio:', len(data)/sr)

# Waveplot

In [47]:
display_waveplot(data, sr)

## Spectogram 

In [48]:
plot_spectrogram(data, sr)

# Feature Engineering

In [49]:
#We extend the data frame df by the features
    # Lenght_Data_Array
    # Samplerate

for row in df.index:
    file = df.loc[row, 'File']
    path_file = path+'Actor_'+str(df.loc[row, 'Actor']).zfill(2)+'/'
    data, sr = read_wav_file(path_file, file)
    df.loc[row, 'Lenght_Data_Array'] = len(data)
    df.loc[row, 'Samplerate'] = sr
df['Seconds'] = df['Lenght_Data_Array']/df['Samplerate'] 

# Data Analysis
The features emotion and intensity are not evenly distributed:

In [50]:
df['Emotion'].value_counts().sort_index()

In [51]:
df['Intensity'].value_counts().sort_index()

In [52]:
df['Statement'].value_counts()

In [53]:
# Checking length of audio file
df['Seconds'].hist(bins=20)
plt.show()

# Encoding Target Labels

In [54]:
labels= pd.DataFrame(0, index=df.index, columns=emotions.values())
for row in labels.index:
    labels.loc[row, labels.columns[df.loc[row, 'Emotion']-1]]=1
labels['File'] = df['File']

In [55]:
labels.head(10)

# Train - Test Split

In [56]:
list_IDs_train, list_IDs_val = train_test_split(list(df.index), test_size=0.33, random_state=2021)

In [57]:
#Using Data Generator to load the data on demand.
batch_size = 32
train_generator = DataGenerator(path, list_IDs_train, labels, batch_size)
val_generator = DataGenerator(path, list_IDs_val, labels, batch_size)

# Model Hyperparameters

In [67]:
epochs = 2
lernrate = 1e-3

In [68]:
model = Sequential()
model.add(Conv1D(64, input_shape=(100, 2540//2,), kernel_size=5, strides=4, activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool1D(pool_size=(4)))
model.add(Conv1D(64, kernel_size=3, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Conv1D(64, kernel_size=3, activation='relu'))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(8, activation='sigmoid'))

In [76]:
model.compile(optimizer = Adam(lr=lernrate),
              loss='binary_crossentropy',
              metrics=['binary_accuracy'])

In [77]:
model.summary()

In [78]:
history = model.fit_generator(generator=train_generator, validation_data=val_generator, epochs = epochs, workers=4)

# Saving Model & Weights

In [63]:
model.save('[CNN]M.h5')
model.save_weights('[CNN]W.h5')

In [64]:
model_name = 'Model.h5'
save_dir = os.path.join(os.getcwd(), 'saved_models')

if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Save model and weights at %s ' % model_path)

model_json = model.to_json()
with open("model_json.json", "w") as json_file:
    json_file.write(model_json)

# Traning Analysis

In [80]:
fig, axs = plt.subplots(1, 2, figsize=(16, 4))
fig.subplots_adjust(hspace = .2, wspace=.2)
axs = axs.ravel()
loss = history.history['loss']
loss_val = history.history['val_loss']
epochs = range(1, len(loss)+1)
axs[0].plot(epochs, loss, 'bo', label='loss_train')
axs[0].plot(epochs, loss_val, 'ro', label='loss_val')
axs[0].set_title('Value of the loss function')
axs[0].set_xlabel('epochs')
axs[0].set_ylabel('value of the loss function')
axs[0].legend()
axs[0].grid()
acc = history.history['binary_accuracy']
acc_val = history.history['val_binary_accuracy']
axs[1].plot(epochs, acc, 'bo', label='accuracy_train')
axs[1].plot(epochs, acc_val, 'ro', label='accuracy_val')
axs[1].set_title('Accuracy')
axs[1].set_xlabel('Epochs')
axs[1].set_ylabel('Value of accuracy')
axs[1].legend()
axs[1].grid()
plt.show()

# Test Accuracy


In [82]:
print("[INFO] Calculating model accuracy")
scores = model.evaluate(val_generator)
print(f"Test Accuracy: {scores[1]*100}")