In [1]:
import numpy as np
import pandas as pd
import os
import re
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import librosa.display
from sklearn.preprocessing import minmax_scale
import IPython.display as ipd

plt.rcParams['figure.figsize'] = (20,8)
plt.rcParams['font.size'] = 16
sns.set_style('darkgrid')
warnings.filterwarnings("ignore")

ModuleNotFoundError: No module named 'librosa'

# Data Preparation
- Create a dataframe containing the meta data
- Create a list of all combinations given in the specification
- Filter out the data using these combinations
- Assign classes to each record (happy or sad)
- Assign gender class to each record so that it can be leveraged during analysis

In [None]:
records = []
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        records.append([filename, os.path.join(dirname,filename)])

data = pd.DataFrame(records, columns=['filename','path'])
data['actor'] = data['path'].apply(lambda x: re.findall("\w+_\d+",x)[0])
data = data[data['actor']!="audio_speech_actors_01"]
data.reset_index(inplace=True,drop=True)
data['type'] = data['filename'].apply(lambda x: re.split("-\d+\.wav",x)[0])
data

## Number of Unique Speakers/Actors

In [3]:
data['actor'].nunique()

NameError: name 'data' is not defined

## Number of Audio files for each of the Speakers/Actors

In [4]:
data['actor'].value_counts()

NameError: name 'data' is not defined

In [5]:
filter_list = []
for a in ["03","04"]:
    for b in ["01","02"]:
        for c in ["01","02"]:
            for d in ["01","02"]:
                filter_list.append(f"03-01-{a}-{b}-{c}-{d}")
                
data = data[data['type'].isin(filter_list)]
data.reset_index(inplace=True,drop=True)
data['class'] = data['type'].apply(lambda x: 'happy' if x.startswith('03-01-03') else 'sad')
data['gender'] = data['actor'].apply(lambda x: 'female' if int(x.split('_')[1])%2==0 else 'male')

NameError: name 'data' is not defined

In [6]:
data

NameError: name 'data' is not defined

## Number of Audio Files for Gender and Emotion Class

In [7]:
data['gender'].value_counts()

NameError: name 'data' is not defined

In [8]:
data['class'].value_counts()

NameError: name 'data' is not defined

# Feature Extraction 
- The acoustic features of an audio can be extracted using different methods, but in this kernel only 2 methods will be used
1. MFCCs
2. Mel Spectrograms

<img src='https://images.deepai.org/converted-papers/2005.12779/x3.png'>

In [9]:
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix

In [10]:
def feature_extraction(df, mfcc=True):
    features = []
    for i,record in tqdm(df.iterrows(),total=df.shape[0]):
        x , sr = librosa.load(record['path'])
        mean_mfcc = np.mean(librosa.feature.mfcc(y=x, sr=sr, n_mfcc=128),axis=1)
        mean_ms = np.mean(librosa.feature.melspectrogram(y=x, sr=sr, n_mels=128),axis=1)
        features.append(mean_mfcc if mfcc else mean_ms)
        
    dataf = pd.DataFrame(features)
    dataf['class'] = df['class']
    return dataf

In [11]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()

## MFCC Features

In [12]:
dataf = feature_extraction(data)
dataf

NameError: name 'data' is not defined

In [None]:
X1 = dataf.iloc[:,:-1].values
y1 = dataf.iloc[:,-1].values 
y1 = encoder.fit_transform(y1)

## Mel Spectrogram Features

In [None]:
dataf = feature_extraction(data,mfcc=False)
dataf

In [None]:
X2 = dataf.iloc[:,:-1].values
y2 = dataf.iloc[:,-1].values
y2 = encoder.fit_transform(y2)

# Modelling
- The following models will be used for building the models for emotion detection
1. Logistic Regression on MFCC and Mel Spec Features
2. CNNs on MFCC and Mel Spec Features
3. Autoencoders and Variational Autoencoders on whichever features are giving better performance in the above models

## Logistic Regression

<img src='https://miro.medium.com/max/1400/1*Ba7LqnrsRnhjJyJl5LPW6Q.gif'>

In [None]:
def LogisticRegressionPipeline(X,y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
    pipeline = Pipeline([('scaler',StandardScaler()),('LogisticRegression',LogisticRegression())])
    pipeline.fit(X_train,y_train)
    y_train_pred = pipeline.predict(X_train)
    y_pred = pipeline.predict(X_test)
    
    cmatrix = confusion_matrix(y_test,y_pred)
    
    print("Training Performance")
    print(classification_report(y_train,y_train_pred))
    print("-----------------------------------------")
    print("Test Performance")
    print(classification_report(y_test,y_pred))
    print("-----------------------------------------")
    
    cv_score = cross_val_score(pipeline,X,y,cv=5)
    average = lambda x: sum(x)/len(x)
    print("5-Folds Scores: ", cv_score)
    print("-----------------------------------------")
    print("5-Folds Average Score: ",average(cv_score))
    
    return cv_score, cmatrix

## Logistic Regression: MFCCs

In [None]:
scores, cmatrix = LogisticRegressionPipeline(X1,y1)

In [13]:
sns.heatmap(cmatrix,annot=True,fmt='.3g',cmap='viridis',xticklabels=['happy','sad'],yticklabels=['happy','sad'])
plt.show()

NameError: name 'cmatrix' is not defined

## Logistic Regression: Mel Spectrogram

In [14]:
scores, cmatrix = LogisticRegressionPipeline(X2,y2)

NameError: name 'LogisticRegressionPipeline' is not defined

In [15]:
sns.heatmap(cmatrix,annot=True,fmt='.3g',cmap='viridis',xticklabels=['happy','sad'],yticklabels=['happy','sad'])
plt.show()

NameError: name 'cmatrix' is not defined

## CNNs (Convolutional Neural Networks)

<img src='https://miro.medium.com/max/1400/1*ciDgQEjViWLnCbmX-EeSrA.gif'>

In [16]:
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dense, Flatten, InputLayer, UpSampling2D, Layer, Reshape
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

ModuleNotFoundError: No module named 'tensorflow'

## CNNs: MFCCs

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X1, y1, test_size=0.2, stratify=y1)
X_train = X_train.reshape(-1,16,8,1)
X_test = X_test.reshape(-1,16,8,1)

NameError: name 'X1' is not defined

In [18]:
model = Sequential([
    InputLayer(input_shape=(16, 8, 1)),
    Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding = "same"),
    MaxPooling2D(2, 2),
    Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding = "same"),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])
model.summary()

NameError: name 'Sequential' is not defined

In [19]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

NameError: name 'model' is not defined

In [20]:
model_name = "model.h5"
checkpoint = ModelCheckpoint(model_name,
                            monitor="val_loss",
                            mode="min",
                            save_best_only = True,
                            verbose=1)

earlystopping = EarlyStopping(monitor='val_loss',min_delta = 0, patience = 5, verbose = 1, restore_best_weights=True)

NameError: name 'ModelCheckpoint' is not defined

In [None]:
history = model.fit(X_train, y_train, epochs=50, validation_data=(X_test,y_test), callbacks=[checkpoint,earlystopping])

In [21]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

NameError: name 'history' is not defined

In [22]:
y_pred = model.predict(X_test)
y_pred[y_pred>=0.5] = 1
y_pred[y_pred<0.5] = 0

NameError: name 'model' is not defined

In [23]:
print(classification_report(y_test,y_pred))
cmatrix = confusion_matrix(y_test,y_pred)
sns.heatmap(cmatrix,annot=True,fmt='.3g',cmap='viridis',xticklabels=['happy','sad'],yticklabels=['happy','sad'])
plt.show()

NameError: name 'y_test' is not defined

## CNNs: Mel Spectrogram

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X2, y2, test_size=0.2, stratify=y2)
X_train = X_train.reshape(-1,16,8,1)
X_test = X_test.reshape(-1,16,8,1)

NameError: name 'X2' is not defined

In [25]:
model = Sequential([
    InputLayer(input_shape=(16, 8, 1)),
    Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding = "same"),
    MaxPooling2D(2, 2),
    Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding = "same"),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])
model.summary()

NameError: name 'Sequential' is not defined

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_name = "model_mel.h5"
checkpoint = ModelCheckpoint(model_name,
                            monitor="val_loss",
                            mode="min",
                            save_best_only = True,
                            verbose=1)

earlystopping = EarlyStopping(monitor='val_loss',min_delta = 0, patience = 5, verbose = 1, restore_best_weights=True)

In [26]:
history = model.fit(X_train, y_train, epochs=50, validation_data=(X_test,y_test), callbacks=[checkpoint,earlystopping])

NameError: name 'model' is not defined

In [27]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.ylim([0,2])
plt.legend(['train', 'val'], loc='upper left')
plt.show()

NameError: name 'history' is not defined

In [28]:
print(classification_report(y_test,y_pred))
cmatrix = confusion_matrix(y_test,y_pred)
sns.heatmap(cmatrix,annot=True,fmt='.3g',cmap='viridis',xticklabels=['happy','sad'],yticklabels=['happy','sad'])
plt.show()

NameError: name 'y_test' is not defined

**Inference:** MFCC Features give better performance

## Autoencoders
<img src='https://miro.medium.com/max/1400/1*44eDEuZBEsmG_TCAKRI3Kw@2x.png'>

In [29]:
dataf = feature_extraction(data)
dataf

NameError: name 'data' is not defined

In [30]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

In [31]:
X_happy = dataf[dataf['class']=='happy'].iloc[:,:-1].values 
X_sad = dataf[dataf['class']=='sad'].iloc[:,:-1].values 

X_happy = scaler.fit_transform(X_happy)
X_sad = scaler.transform(X_sad)

X_happy = X_happy.reshape(-1,16,8,1)
X_sad = X_sad.reshape(-1,16,8,1)

NameError: name 'dataf' is not defined

In [None]:
model = Sequential([
    Input(shape=(16,8,1)),
    Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same'),
    MaxPooling2D(2,2),
    Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'),
    MaxPooling2D(2,2),
    Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'),
    UpSampling2D((2,2)),
    Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same'),
    UpSampling2D((2,2)),
    Conv2D(filters=1, kernel_size=(3,3), activation='sigmoid', padding='same')
])

model.compile(loss='mean_squared_error',optimizer='adam')

In [32]:
model.summary()

NameError: name 'model' is not defined

In [33]:
history = model.fit(X_happy,X_happy,epochs=100)

NameError: name 'model' is not defined

In [None]:
plt.plot(history.history['loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.show()

In [34]:
model.evaluate(X_happy,X_happy)

NameError: name 'model' is not defined

In [35]:
model.evaluate(X_sad,X_sad)

NameError: name 'model' is not defined

**Inference:** There is not a significant difference between the loss for happy audios and sad audios

## Variational Autoencoders
<img src='https://www.jeremyjordan.me/content/images/2018/03/Screen-Shot-2018-03-18-at-12.24.19-AM.png'>

In [36]:
class Sampling(Layer):
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

NameError: name 'Layer' is not defined

In [37]:
input_layer = Input(shape=(16,8,1))
x = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(input_layer)
x = MaxPooling2D(2,2)(x)
x = Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same')(x)
x = MaxPooling2D(2,2)(x)
x = Flatten()(x)
z_mean = Dense(128, name="z_mean")(x)
z_log_var = Dense(128, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
x = Dense(4*2*64, activation="relu")(z)
x = Reshape((4, 2, 64))(x)
x = UpSampling2D()(x)
x = Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same')(x)
x = UpSampling2D()(x)
x = Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same')(x)
output = Conv2D(filters=1, kernel_size=(3,3), activation='sigmoid', padding='same')(x)

model = Model(inputs=input_layer,outputs=output)
model.compile(loss='mean_squared_error',optimizer='adam')
model.summary()

NameError: name 'Input' is not defined

In [38]:
history = model.fit(X_happy,X_happy,epochs=100)

NameError: name 'model' is not defined

In [None]:
plt.plot(history.history['loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.show()

In [39]:
model.evaluate(X_happy,X_happy)

NameError: name 'model' is not defined

In [40]:
model.evaluate(X_sad,X_sad)

NameError: name 'model' is not defined

**Inference:** There is a significant difference between the loss of happy and sad audios

In [41]:
X_sc = np.vstack([X_happy,X_sad])
y = np.hstack([np.ones((192,)),np.zeros((192,))])

NameError: name 'X_happy' is not defined

In [42]:
var_encoder = Model(inputs=model.input,outputs=model.layers[6].output)
var_encoder.summary()

NameError: name 'Model' is not defined

In [None]:
z_mean = var_encoder.predict(X_sc)

In [43]:
sns.scatterplot(z_mean[:, 0], z_mean[:, 1], hue=y, s=80)
plt.title("Variational Autoencoder Latent Space Representation in 2D")
plt.xlabel("z[0]")
plt.ylabel("z[1]")
plt.show()

NameError: name 'z_mean' is not defined

<div class='alert alert-success'><strong>Conclusion:</strong>
    <li><strong>Logistic Regression</strong> model performs well with the MFCC Features with a decent classifcation accuracy and F1-score</li>
    <li><strong>CNNs</strong> perform well on the MFCC Features as well, therefore for autoencoders only MFCCs are taken as features</li>
    <li><strong>Variational Autoencoders</strong> perform a better job in terms of reconstruction loss than the regular autoencoders as they are able to model the distribution of happy audios in <strong>128-Dimensional latent space</strong></li>
</div>

In [None]:
pip install -r.\requirements.txt