In [None]:
import os

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import librosa

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

import keras
from keras.callbacks import ReduceLROnPlateau
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint

In [None]:
Ravdess = "./datasets/Ravdess/"
Crema = "./datasets/Crema/"
Tess = "./datasets/Tess/"
Savee = "./datasets/Savee/"

### Ravdess Dataframe

In [None]:
ravdess_directory_list = os.listdir(Ravdess)
file_path = []
file_emotion = []
for dir in ravdess_directory_list:
    actor = os.listdir(Ravdess+dir)
    for file in actor:
        part = file.split(".")[0]
        part = part.split("-")
        file_emotion.append(int(part[2]))
        file_path.append(Ravdess+dir+"/"+file)

emotion_df = pd.DataFrame(file_emotion, columns=["Emotions"])

path_df = pd.DataFrame(file_path, columns=["Path"])
Ravdess_df = pd.concat([emotion_df, path_df], axis=1)
Ravdess_df.Emotions.replace({1:"neutral", 2:"calm",3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'}, inplace=True)
Ravdess_df.head()

### Crema Dataframe

In [None]:
crema_directory_list = os.listdir(Crema)

file_emotion = []
file_path = []

for file in crema_directory_list:
    # storing file paths
    file_path.append(Crema + file)
    # storing file emotions
    part=file.split('_')
    if part[2] == 'SAD':
        file_emotion.append('sad')
    elif part[2] == 'ANG':
        file_emotion.append('angry')
    elif part[2] == 'DIS':
        file_emotion.append('disgust')
    elif part[2] == 'FEA':
        file_emotion.append('fear')
    elif part[2] == 'HAP':
        file_emotion.append('happy')
    elif part[2] == 'NEU':
        file_emotion.append('neutral')
    else:
        file_emotion.append('Unknown')
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Crema_df = pd.concat([emotion_df, path_df], axis=1)
Crema_df.head()

### Tess Dataframe

In [None]:
tess_directory_list = os.listdir(Tess)

file_emotion = []
file_path = []

for dir in tess_directory_list:
    directories = os.listdir(Tess + dir)
    for file in directories:
        part = file.split('.')[0]
        part = part.split('_')[2]
        if part=='ps':
            file_emotion.append('surprise')
        else:
            file_emotion.append(part)
        file_path.append(Tess + dir + '/' + file)
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Tess_df = pd.concat([emotion_df, path_df], axis=1)
Tess_df.head()

### Savee Dataframe

In [None]:
savee_directory_list = os.listdir(Savee)

file_emotion = []
file_path = []

for file in savee_directory_list:
    file_path.append(Savee + file)
    part = file.split('_')[1]
    ele = part[:-6]
    if ele=='a':
        file_emotion.append('angry')
    elif ele=='d':
        file_emotion.append('disgust')
    elif ele=='f':
        file_emotion.append('fear')
    elif ele=='h':
        file_emotion.append('happy')
    elif ele=='n':
        file_emotion.append('neutral')
    elif ele=='sa':
        file_emotion.append('sad')
    else:
        file_emotion.append('surprise')
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Savee_df = pd.concat([emotion_df, path_df], axis=1)
Savee_df.head()

### Create Dataframe from all four dataframes

In [None]:
# creating Dataframe using all the 4 dataframes we created so far.
data_path = pd.concat([Ravdess_df, Crema_df, Tess_df, Savee_df], axis = 0)
data_path.head()

### Data Visualization and Exploration

In [None]:
plt.title('Count of Emotions', size=16)
sns.countplot(data_path.Emotions, palette="pastel")
plt.ylabel('Count', size=12)
plt.xlabel('Emotions', size=12)
sns.despine(top=True, right=True, left=False, bottom=False)
plt.show()

### Data Preparation

In [None]:
X, Y = [], []
for path, emotion in zip(data_path.Path, data_path.Emotions):
    X.append(path)
    Y.append(emotion)

len(X), len(Y), data_path.Path.shape

In [None]:
# encoder = OneHotEncoder()
# Y = encoder.fit_transform(np.array(Y).reshape(-1,1)).toarray()
# Y

In [None]:
# encoder.categories_

In [None]:
# type(encoder.categories_[0])

In [None]:
# splitting data
x_train, x_test, y_train, y_test = train_test_split(X, Y, random_state=0, shuffle=True)
len(x_train), len(y_train), len(x_test), len(y_test)

In [None]:
x_train, y_train

### Save Test set data

In [None]:
# x_test_df = pd.DataFrame(x_test, columns=["Path"])
# y_test_df = pd.DataFrame(y_test, columns=encoder.categories_[0])
# test_df = pd.concat([x_test_df, y_test_df], axis=1)
# test_df.to_csv("./datasets/test_dataset.csv")

In [None]:
x_test_df = pd.DataFrame(x_test, columns=["Path"])
y_test_df = pd.DataFrame(y_test, columns=["Emotion"])
test_df = pd.concat([x_test_df, y_test_df], axis=1)
test_df.to_csv("./datasets/test_dataset.csv", index=False)


### Data Augmentation

In [None]:
def noise(data):
    noise_amp = 0.035*np.random.uniform()*np.amax(data)
    data = data + noise_amp*np.random.normal(size=data.shape[0])
    return data

def stretch(data, rate=0.8):
    return librosa.effects.time_stretch(data, rate=rate)

def shift(data):
    shift_range = int(np.random.uniform(low=-5, high = 5)*1000)
    return np.roll(data, shift_range)

def pitch(data, sampling_rate, pitch_factor=0.7):
    return librosa.effects.pitch_shift(data, sr=sampling_rate, n_steps=pitch_factor)

# taking any example and checking for techniques.
path = np.array(data_path.Path)[1]
data, sample_rate = librosa.load(path)

We use only noise and stretch, copying the steps from kaggle notebook

In [None]:
def extract_features(data):
    # ZCR
    result = np.array([])
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
    result=np.hstack((result, zcr)) # stacking horizontally

    # Chroma_stft
    stft = np.abs(librosa.stft(data))
    chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
    result = np.hstack((result, chroma_stft)) # stacking horizontally

    # MFCC
    mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mfcc)) # stacking horizontally

    # Root Mean Square Value
    rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
    result = np.hstack((result, rms)) # stacking horizontally

    # MelSpectogram
    mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mel)) # stacking horizontally
    
    return result

def get_features(path):
    # duration and offset are used to take care of the no audio in start and the ending of each audio files as seen above.
    data, sample_rate = librosa.load(path, duration=2.5, offset=0.6)
    
    # without augmentation
    res1 = extract_features(data)
    result = np.array(res1)
    
    # data with noise
    noise_data = noise(data)
    res2 = extract_features(noise_data)
    result = np.vstack((result, res2)) # stacking vertically
    
    # data with stretching and pitching
    new_data = stretch(data)
    data_stretch_pitch = pitch(new_data, sample_rate)
    res3 = extract_features(data_stretch_pitch)
    result = np.vstack((result, res3)) # stacking vertically
    
    return result

In [None]:
X, Y = [], []
for path, emotion in zip(x_train, y_train):
    feature = get_features(path)
    for ele in feature:
        X.append(ele)
        # appending emotion 3 times as we have made 3 augmentation techniques on each audio file.
        Y.append(emotion)

In [None]:
len(X), len(Y), data_path.Path.shape

In [None]:
Features = pd.DataFrame(X)
Features['labels'] = Y
Features.head(2)

In [None]:
Features.to_csv("./datasets/train_dataset.csv", index=False)
# Features.to_excel("./datasets/train_dataset.xlsx")

### Data Preparation

In [None]:
# test_dataset = pd.read_csv("./datasets/test_dataset.csv")
# y = test_dataset[["Emotion"]]
# x = test_dataset.drop("Emotion", axis=1)
# x.shape, y.shape

In [None]:
def get_features_from_test(path):
    # duration and offset are used to take care of the no audio in start and the ending of each audio files as seen above.
    data, sample_rate = librosa.load(path, duration=2.5, offset=0.6)
    
    # without augmentation
    res = extract_features(data)
    result = np.array(res)

    return result

In [None]:
x_test, y_test = [], []
for path, emotion in zip(x.Path, y.Emotion):
    features = get_features_from_test(path)
    x_test.append(features)
    y_test.append(emotion)


In [None]:
x_test, y_test

In [None]:
train_dataset = pd.read_csv("./datasets/train_dataset.csv")

# Separate features (X) and labels (Y)
Y = train_dataset[['labels']].copy()
# Y.columns = ['Emotion']  # Rename the column to match our convention
X = train_dataset.drop('labels', axis=1)
X.shape, Y.shape

In [None]:
test_dataset = pd.read_csv("./datasets/test_dataset.csv")

y_test = test_dataset[["Emotion"]].copy()
x_test = test_dataset.drop("Emotion", axis=1)
x_test.shape, y_test.shape


One Hot Encoding

In [None]:
encoder = OneHotEncoder()
Y = encoder.fit_transform(np.array(Y).reshape(-1, 1)).toarray()
# y_test = encoder.fit_transform(np.array(y_test).reshape(-1, 1)).toarray()

In [None]:
y_test = encoder.fit_transform(np.array(y_test).reshape(-1, 1)).toarray()

In [None]:
Y, encoder.categories_

In [None]:
y_test, encoder.categories_

Scaler

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)
X

In [None]:
x_test = scaler.fit_transform(x_test)
x_test

In [None]:
X.shape

In [None]:
X = np.expand_dims(X, axis=2)
X.shape

### Modelling

In [None]:
model = Sequential()
model.add(Conv1D(256, kernel_size=5, strides=1, padding="same", activation="relu", input_shape=(X.shape[1], 1)))
model.add(MaxPooling1D(pool_size=5, strides=2, padding="same"))

model.add(Conv1D(256, kernel_size=5, strides=1, padding="same", activation="relu"))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))

model.add(Conv1D(128, kernel_size=5, strides=1, padding="same", activation="relu"))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = "same"))
model.add(Dropout(0.2))

model.add(Conv1D(64, kernel_size=5, strides=1, padding="same", activation="relu"))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = "same"))

model.add(Flatten())
model.add(Dense(units=32, activation="relu"))
model.add(Dropout(0.3))

model.add(Dense(units=8, activation="softmax"))
model.compile(optimizer = "adam" , loss = "categorical_crossentropy" , metrics = ["accuracy"])

model.summary()

In [None]:
rlrp = ReduceLROnPlateau(monitor="loss", factor=0.4, verbose=0, patience=2, min_lr=10e-8)
history = model.fit(X, Y, batch_size=64, epochs=50, validation_data=(x_test, y_test), callbacks=[rlrp])