#  <center> Speech Recognition <center>

# Importing Libraries

In [None]:
!apt update
!apt-get install -y libsndfile1

In [None]:
import pandas as pd
import numpy as np

import os
import sys

# librosa is a Python library for analyzing audio and music. It can be used to extract the data from the audio files we will see it later.
import librosa
import librosa.display
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

# to play the audio files
from IPython.display import Audio

import keras
from keras.models import Sequential, Model
from keras.layers import GRU, LSTM, TimeDistributed
from keras.callbacks import ReduceLROnPlateau
from keras.layers import Dense, Conv1D, MaxPooling1D, Activation, Flatten, Dropout, BatchNormalization
from keras.utils import np_utils, to_categorical
from keras.callbacks import ModelCheckpoint

import tensorflow as tf

import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning) 

## Data Preparation

##  <center> 1. NonSpeech7k dataset <center>

In [None]:
# Paths for data.
Nonspeech7k_test_data = pd.read_csv("/kaggle/input/nonspeech7k/metadata of test set.csv")
Nonspeech7k_train_data = pd.read_csv("/kaggle/input/nonspeech7k/metadata of train set .csv")
Nonspeech7k_audio_data1 = "/kaggle/input/nonspeech7k-audio"
Nonspeech7k_audio_data2 = "/kaggle/input/nonspeech7k-audio-train/train - Copy/train - Copy/train"

In [None]:
Nonspeech7k_train_data

In [None]:
Nonspeech7k_test_data

In [None]:
element = Nonspeech7k_test_data.loc[3, "Classname"]
element

In [None]:
data = Nonspeech7k_test_data[Nonspeech7k_test_data['Filename']=="112557-2_0_0.wav"] 
data["Classname"].values[0]

In [None]:
nonspeech7k_directory_list = os.listdir(Nonspeech7k_audio_data2)

file_emotion = []
file_path = []

for i,file in enumerate(nonspeech7k_directory_list):
    # storing file paths
    file_path.append(Nonspeech7k_audio_data2 + '/' + file)
    # storing file emotions
    data = Nonspeech7k_train_data[Nonspeech7k_train_data['Filename']==file] 
    file_emotion.append(data["Classname"].values[0])
    
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Nonspeech7k_df1 = pd.concat([emotion_df, path_df], axis=1)

In [None]:
nonspeech7k_directory_list = os.listdir(Nonspeech7k_audio_data1)

file_emotion = []
file_path = []

for i,file in enumerate(nonspeech7k_directory_list):
    # storing file paths
    file_path.append(Nonspeech7k_audio_data1 + '/' + file)
    # storing file emotions
    data = Nonspeech7k_test_data[Nonspeech7k_test_data['Filename']==file] 
    file_emotion.append(data["Classname"].values[0])
    
        
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Nonspeech7k_df2 = pd.concat([emotion_df, path_df], axis=1)

In [None]:
Nonspeech7k_df1

In [None]:
Nonspeech7k_df2

In [None]:
# Nonspeech7k_df = pd.concat([Nonspeech7k_df1,Nonspeech7k_df2],axis = 0)
# Nonspeech7k_df.to_csv("Nonspeech7k_df.csv",index=False)
Nonspeech7k_df = Nonspeech7k_df1
Nonspeech7k_df

## Data Visualisation and Exploration

First let's plot the count of each emotions in our dataset.

In [None]:
plt.title('Count of Emotions', size=16)
sns.countplot(Nonspeech7k_df.Emotions)
plt.ylabel('Count', size=12)
plt.xlabel('Emotions', size=12)
sns.despine(top=True, right=True, left=False, bottom=False)
plt.show()

In [None]:
def create_waveplot(data, sr, e):
    plt.figure(figsize=(10, 3))
    plt.title('Waveplot for audio with {} emotion'.format(e), size=15)
    librosa.display.waveplot(data, sr=sr)
    plt.show()

def create_spectrogram(data, sr, e):
    # stft function converts the data into short term fourier transform
    X = librosa.stft(data)
    Xdb = librosa.amplitude_to_db(abs(X))
    plt.figure(figsize=(12, 3))
    plt.title('Spectrogram for audio with {} emotion'.format(e), size=15)
    librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')   
    #librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='log')
    plt.colorbar()

In [None]:
emotion='laugh'
path = np.array(Nonspeech7k_df.Path[Nonspeech7k_df.Emotions==emotion])[1]
data, sampling_rate = librosa.load(path)
create_waveplot(data, sampling_rate, emotion)
create_spectrogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion='cough'
path = np.array(Nonspeech7k_df.Path[Nonspeech7k_df.Emotions==emotion])[1]
data, sampling_rate = librosa.load(path)
create_waveplot(data, sampling_rate, emotion)
create_spectrogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion='sneeze'
path = np.array(Nonspeech7k_df.Path[Nonspeech7k_df.Emotions==emotion])[1]
data, sampling_rate = librosa.load(path)
create_waveplot(data, sampling_rate, emotion)
create_spectrogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion='yawn'
path = np.array(Nonspeech7k_df.Path[Nonspeech7k_df.Emotions==emotion])[1]
data, sampling_rate = librosa.load(path)
create_waveplot(data, sampling_rate, emotion)
create_spectrogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion='crying'
path = np.array(Nonspeech7k_df.Path[Nonspeech7k_df.Emotions==emotion])[1]
data, sampling_rate = librosa.load(path)
create_waveplot(data, sampling_rate, emotion)
create_spectrogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion='breath'
path = np.array(Nonspeech7k_df.Path[Nonspeech7k_df.Emotions==emotion])[1]
data, sampling_rate = librosa.load(path)
create_waveplot(data, sampling_rate, emotion)
create_spectrogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion='screaming'
path = np.array(Nonspeech7k_df.Path[Nonspeech7k_df.Emotions==emotion])[1]
data, sampling_rate = librosa.load(path)
create_waveplot(data, sampling_rate, emotion)
create_spectrogram(data, sampling_rate, emotion)
Audio(path)

## Data Augmentation

In [None]:
def noise(data):
    noise_amp = 0.035*np.random.uniform()*np.amax(data)
    data = data + noise_amp*np.random.normal(size=data.shape[0])
    return data

def stretch(data, rate=0.8):
    return librosa.effects.time_stretch(data, rate)

def shift(data):
    shift_range = int(np.random.uniform(low=-5, high = 5)*1000)
    return np.roll(data, shift_range)

def pitch(data, sampling_rate, pitch_factor=0.7):
    return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)

# taking any example and checking for techniques.
path = np.array(Nonspeech7k_df.Path)[1]
data, sample_rate = librosa.load(path)

#### 1. Simple Audio

In [None]:
plt.figure(figsize=(14,4))
librosa.display.waveplot(y=data, sr=sample_rate)
Audio(path)

#### 2. Noise Injection

In [None]:
x = noise(data)
plt.figure(figsize=(14,4))
librosa.display.waveplot(y=x, sr=sample_rate)
Audio(x, rate=sample_rate)

#### 3. Stretching

In [None]:
x = stretch(data)
plt.figure(figsize=(14,4))
librosa.display.waveplot(y=x, sr=sample_rate)
Audio(x, rate=sample_rate)

#### 4. Shifting

In [None]:
x = shift(data)
plt.figure(figsize=(14,4))
librosa.display.waveplot(y=x, sr=sample_rate)
Audio(x, rate=sample_rate)

#### 5. Pitch

In [None]:
x = pitch(data, sample_rate)
plt.figure(figsize=(14,4))
librosa.display.waveplot(y=x, sr=sample_rate)
Audio(x, rate=sample_rate)

In [None]:
def extract_features(data):
    # ZCR
    result = np.array([])
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
    result=np.hstack((result, zcr)) # stacking horizontally
    
    #Spectral_Centroid
    sc = np.mean(librosa.feature.spectral_centroid(y=data).T, axis=0)
    result=np.hstack((result, sc)) # stacking horizontally
    
    #Spectral_Rolloff
    spr = np.mean(librosa.feature.spectral_rolloff(y=data).T, axis=0)
    result=np.hstack((result, spr)) # stacking horizontally

    # Chroma_stft
    stft = np.abs(librosa.stft(data))
    chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
    result = np.hstack((result, chroma_stft)) # stacking horizontally

    # MFCC
    mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mfcc)) # stacking horizontally

    # Root Mean Square Value
    rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
    result = np.hstack((result, rms)) # stacking horizontally

    # MelSpectogram
    mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mel)) # stacking horizontally
    
    return result

def get_features(path):
    # duration and offset are used to take care of the no audio in start and the ending of each audio files as seen above.
    data, sample_rate = librosa.load(path,sr = None, duration=2.5, offset=0.6)
    if data is None or len(data) == 0:
        return np.array([])
    # without augmentation
    res1 = extract_features(data)
    result = np.array(res1)
    
    # data with noise
    noise_data = noise(data)
    res2 = extract_features(noise_data)
    result = np.vstack((result, res2)) # stacking vertically
    
    # data with stretching and pitching
    new_data = stretch(data)
    data_stretch_pitch = pitch(new_data, sample_rate)
    res3 = extract_features(data_stretch_pitch)
    result = np.vstack((result, res3)) # stacking vertically
    
    return result

In [None]:
X, Y = [], []
for path, emotion in zip(Nonspeech7k_df.Path, Nonspeech7k_df.Emotions):
    feature = get_features(path)
    if len(feature) == 0:
        continue
    for ele in feature:
        X.append(ele)
        # appending emotion 3 times as we have made 3 augmentation techniques on each audio file.
        Y.append(emotion)

In [None]:
len(X), len(Y), Nonspeech7k_df.Path.shape

In [None]:
Features = pd.DataFrame(X)
Features['labels'] = Y
Features.to_csv('features.csv', index=False)
Features

## Data Preparation

In [None]:
X = Features.iloc[: ,:-1].values
Y = Features['labels'].values

In [None]:
# As this is a multiclass classification problem onehotencoding our Y.
encoder = OneHotEncoder()
Y = encoder.fit_transform(np.array(Y).reshape(-1,1)).toarray()

In [None]:
# splitting data
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0, shuffle=True)
x_train.shape, y_train.shape, x_test.shape, y_test.shape

In [None]:
# scaling our data with sklearn's Standard scaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_train.shape, y_train.shape, x_test.shape, y_test.shape

In [None]:
x_train

# **CNN Model**

In [None]:
# making our data compatible to model.
x_train = np.expand_dims(x_train, axis=2)
x_test = np.expand_dims(x_test, axis=2)
x_train.shape, y_train.shape, x_test.shape, y_test.shape

In [None]:
model=Sequential()
model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu', input_shape=(x_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))

model.add(Conv1D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Dropout(0.2))

model.add(Conv1D(64, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))

model.add(Conv1D(64, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))

model.add(Flatten())
model.add(Dense(units=32, activation='relu'))
# model.add(Dropout(0.3))

model.add(Dense(units=7, activation='softmax'))
model.compile(optimizer = 'adam' , loss = 'categorical_crossentropy' , metrics = ['accuracy'])

model.summary()

In [None]:
rlrp = ReduceLROnPlateau(monitor='loss', factor=0.4, verbose=0, patience=2, min_lr=0.0000001)
hist_cnn=model.fit(x_train, y_train, batch_size=64, epochs=100, validation_data=(x_test, y_test), callbacks=[rlrp])

In [None]:
y_pred = model.predict(x_test)
y_pred_cnn = encoder.inverse_transform(y_pred)
y_test_cnn = encoder.inverse_transform(y_test)

In [None]:
df_cnn = pd.DataFrame(columns=['Predicted Labels', 'Actual Labels'])
df_cnn['Predicted Labels'] = y_pred_cnn.flatten()
df_cnn['Actual Labels'] = y_test_cnn.flatten()

df_cnn.head(10)

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
print("Accuracy of our model on test data : " , model.evaluate(x_test,y_test)[1]*100 , "%")
epochs = [i for i in range(100)]
fig , ax = plt.subplots(1,3)
train_acc = hist_cnn.history['accuracy']
# train_acc = train_acc[10:40]
train_loss = hist_cnn.history['loss']
# train_loss = train_loss[10:40]
test_acc = hist_cnn.history['val_accuracy']
# test_acc = test_acc[10:40]
test_loss = hist_cnn.history['val_loss']
# test_loss = test_loss[10:40]

labels=['breath', 'cough', 'crying', 'laugh', 'scream', 'sneeze','yawn']
fig.set_size_inches(20,4)
# plt.rcParams['axes.grid'] = False
plt.rcParams['font.weight'] = "bold"
ax[1].plot(epochs , train_loss , label = 'Training Loss')
ax[1].plot(epochs , test_loss , label = 'Testing Loss')
ax[1].set_title('Model Loss',fontweight="bold",fontsize = 15)
ax[1].legend()
ax[1].set_xlabel("Number of Epochs", fontweight="bold", fontsize = 15)

ax[0].plot(epochs , train_acc , label = 'Training Accuracy')
ax[0].plot(epochs , test_acc , label = 'Testing Accuracy')
ax[0].set_title('Model Accuracy',fontweight="bold",fontsize = 15)
ax[0].legend()
ax[0].set_xlabel("Number of Epochs", fontweight="bold", fontsize = 15)

cf_matrix = confusion_matrix(y_test_cnn, y_pred_cnn)
disp = ConfusionMatrixDisplay(cf_matrix, display_labels=epochs)
disp.plot(ax=ax[2],xticks_rotation='vertical',cmap="afmhot")
disp.ax_.set_title("Confusion Matrix", fontweight="bold",fontsize = 15)
disp.ax_.set_xticklabels(labels, fontsize = 11)
disp.ax_.set_yticklabels(labels, fontsize = 11)
disp.im_.colorbar.remove()
disp.ax_.set_xlabel('')
disp.ax_.set_ylabel('True label',fontweight="bold", fontsize = 15)
disp.ax_.set_xlabel('Predicted label', fontweight="bold", fontsize = 15)

fig.text(0.45, -0.14, 'CNN Model', fontweight="bold", fontsize = 20)
plt.show()

In [None]:
print(classification_report(y_test_cnn, y_pred_cnn))

# **LSTM Model**

In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, TimeDistributed, Conv1D, MaxPooling1D, Flatten
import numpy as np

# Sample data shapes
x_train_shape = (4077, 164)
y_train_lstm = (4077, 7)
x_test_shape = (1020, 164)
y_test_lstm = (1020, 7)

# Reshape the data to 3D
x_train_lstm = np.reshape(x_train, (x_train_shape[0], x_train_shape[1], 1))

x_train_lstm.shape, x_train.shape, y_train.shape

In [None]:
# Create the model
modelLSTM = Sequential([
    LSTM(256, return_sequences=False, input_shape=(164, 1)),
    Dropout(0.2),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(7, activation='softmax'),
])

In [None]:
modelLSTM.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])

modelLSTM.summary()

In [None]:
hist_lstm = modelLSTM.fit(x_train_lstm, y_train, validation_data=(x_test_lstm, y_test), batch_size=32, epochs=100)

In [None]:
y_pred_lstm = modelLSTM.predict(x_test_lstm)
y_pred = encoder.inverse_transform(y_pred_lstm)
y_test_lstm = encoder.inverse_transform(y_test)

In [None]:
df_lstm = pd.DataFrame(columns=['Predicted Labels', 'Actual Labels'])
df_lstm['Predicted Labels'] = y_pred.flatten()
df_lstm['Actual Labels'] = y_test_lstm.flatten()

df_lstm

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
print("Accuracy of our model on test data : " ,modelLSTM.evaluate(x_test_lstm,y_test)[1]*100 , "%")
epochs = [i for i in range(30)]
fig , ax = plt.subplots(1,3)
train_acc = hist_lstm.history['accuracy']
# train_acc = train_acc[10:40]
train_loss = hist_lstm.history['loss']
# train_loss = train_loss[10:40]
test_acc = hist_lstm.history['val_accuracy']
# test_acc = test_acc[10:40]
test_loss = hist_lstm.history['val_loss']
# test_loss = test_loss[10:40]

labels=['breath', 'cough', 'crying', 'laugh', 'scream', 'sneeze','yawn']
fig.set_size_inches(20,4)
# plt.rcParams['axes.grid'] = False
plt.rcParams['font.weight'] = "bold"
ax[1].plot(epochs , train_loss , label = 'Training Loss')
ax[1].plot(epochs , test_loss , label = 'Testing Loss')
ax[1].set_title('Model Loss',fontweight="bold",fontsize = 15)
ax[1].legend()
ax[1].set_xlabel("Number of Epochs", fontweight="bold", fontsize = 15)

ax[0].plot(epochs , train_acc , label = 'Training Accuracy')
ax[0].plot(epochs , test_acc , label = 'Testing Accuracy')
ax[0].set_title('Model Accuracy',fontweight="bold",fontsize = 15)
ax[0].legend()
ax[0].set_xlabel("Number of Epochs", fontweight="bold", fontsize = 15)

cf_matrix = confusion_matrix(y_test_lstm, y_pred)
disp = ConfusionMatrixDisplay(cf_matrix, display_labels=epochs)
disp.plot(ax=ax[2],xticks_rotation='vertical',cmap="Greens")
disp.ax_.set_title("Confusion Matrix", fontweight="bold",fontsize = 15)
disp.ax_.set_xticklabels(labels, fontsize = 11)
disp.ax_.set_yticklabels(labels, fontsize = 11)
disp.im_.colorbar.remove()
disp.ax_.set_xlabel('')
disp.ax_.set_ylabel('True label',fontweight="bold", fontsize = 15)
disp.ax_.set_xlabel('Predicted label', fontweight="bold", fontsize = 15)

fig.text(0.45, -0.14, 'LSTM Model', fontweight="bold", fontsize = 20)
plt.show()

In [None]:
print(classification_report(y_test_lstm, y_pred_lstm))

# **BiLSTM Model**

In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, TimeDistributed, Conv1D, MaxPooling1D, Flatten
import numpy as np

# Sample data shapes
x_train_shape = (4077, 164)
y_train_bilstm = (4077, 7)
x_test_shape = (1020, 164)
y_test_bilstm = (1020, 7)

# Reshape the data to 3D
x_train_bilstm = np.reshape(x_train, (x_train_shape[0], x_train_shape[1], 1))
x_test_bilstm = np.reshape(x_test, (x_test_shape[0], x_test_shape[1], 1))

x_train_bilstm.shape, x_train.shape, y_train.shape

In [None]:
from keras.layers import Bidirectional
modelBiLSTM = Sequential([
    Bidirectional(LSTM(128, return_sequences=False, input_shape=(164, 1))),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(7, activation='softmax'),
])

In [None]:
modelBiLSTM.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
modelBiLSTM.build((None, 164, 1))
modelBiLSTM.summary()

In [None]:
rlrp = ReduceLROnPlateau(monitor='loss', factor=0.4, verbose=0, patience=2, min_lr=0.0000001)
hist_bilstm = modelBiLSTM.fit(x_train_bilstm, y_train, validation_data=(x_test_bilstm, y_test), batch_size=32, epochs=120, callbacks=[rlrp])

In [None]:
y_pred_bilstm = modelBiLSTM.predict(x_test_bilstm)
y_pred = encoder.inverse_transform(y_pred_bilstm)
y_test_bilstm = encoder.inverse_transform(y_test)

In [None]:
df_bilstm = pd.DataFrame(columns=['Predicted Labels', 'Actual Labels'])
df_bilstm['Predicted Labels'] = y_pred.flatten()
df_bilstm['Actual Labels'] = y_test_bilstm.flatten()

df_bilstm

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
print("Accuracy of our model on test data : " ,modelBiLSTM.evaluate(x_test_bilstm,y_test)[1]*100 , "%")
epochs = [i for i in range(100)]
fig , ax = plt.subplots(1,3)
train_acc = hist_bilstm.history['accuracy']
train_acc = train_acc[20:120]
train_loss = hist_bilstm.history['loss']
train_loss = train_loss[20:120]
test_acc = hist_bilstm.history['val_accuracy']
test_acc = test_acc[20:120]
test_loss = hist_bilstm.history['val_loss']
test_loss = test_loss[20:120]

labels=['breath', 'cough', 'crying', 'laugh', 'scream', 'sneeze','yawn']
fig.set_size_inches(20,4)
# plt.rcParams['axes.grid'] = False
plt.rcParams['font.weight'] = "bold"
ax[1].plot(epochs , train_loss , label = 'Training Loss')
ax[1].plot(epochs , test_loss , label = 'Testing Loss')
ax[1].set_title('Model Loss',fontweight="bold",fontsize = 15)
ax[1].legend()
ax[1].set_xlabel("Number of Epochs", fontweight="bold", fontsize = 15)

ax[0].plot(epochs , train_acc , label = 'Training Accuracy')
ax[0].plot(epochs , test_acc , label = 'Testing Accuracy')
ax[0].set_title('Model Accuracy',fontweight="bold",fontsize = 15)
ax[0].legend()
ax[0].set_xlabel("Number of Epochs", fontweight="bold", fontsize = 15)

cf_matrix = confusion_matrix(y_test_bilstm, y_pred)
disp = ConfusionMatrixDisplay(cf_matrix, display_labels=epochs)
disp.plot(ax=ax[2],xticks_rotation='vertical',cmap="Greens")
disp.ax_.set_title("Confusion Matrix", fontweight="bold",fontsize = 15)
disp.ax_.set_xticklabels(labels, fontsize = 11)
disp.ax_.set_yticklabels(labels, fontsize = 11)
disp.im_.colorbar.remove()
disp.ax_.set_xlabel('')
disp.ax_.set_ylabel('True label',fontweight="bold", fontsize = 15)
disp.ax_.set_xlabel('Predicted label', fontweight="bold", fontsize = 15)

fig.text(0.45, -0.14, 'BiLSTM Model', fontweight="bold", fontsize = 20)
plt.show()

# **1D Resnet**

In [None]:
import numpy as np

# Sample data shapes
x_train_shape = (4077, 164)
y_train_1d_resnet = (4077, 7)
x_test_shape = (1020, 164)
y_test_1d_resnet = (1020, 7)

# Reshape the data to 3D
x_train_1d_resnet = np.reshape(x_train, (x_train_shape[0], x_train_shape[1], 1))
x_test_1d_resnet = np.reshape(x_test, (x_test_shape[0], x_test_shape[1], 1))

x_train_1d_resnet.shape, x_train.shape, y_train.shape

In [None]:
from tensorflow.keras.layers import Input, Conv1D, BatchNormalization, Activation, Add

def residual_block(x, filters, kernel_size, dilation_rate):
    # Shortcut connection
    shortcut = x
    
    # Main path
    x = Conv1D(filters, kernel_size, dilation_rate=dilation_rate, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = Conv1D(filters, kernel_size, dilation_rate=dilation_rate, padding='same')(x)
    x = BatchNormalization()(x)
    
    # Add shortcut value to main path
    x = Add()([x, shortcut])
    x = Activation('relu')(x)
    
    return x

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Flatten
def build_1d_resnet(input_shape, num_classes):
    inputs = Input(shape=input_shape)
    
    x = Conv1D(64, 7, padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    # 4 Residual blocks
#     x = residual_block(x, filters=64, kernel_size=3, dilation_rate=1)
#     x = residual_block(x, filters=64, kernel_size=3, dilation_rate=2)
#     x = residual_block(x, filters=64, kernel_size=3, dilation_rate=4)
#     x = residual_block(x, filters=64, kernel_size=3, dilation_rate=8)
    
    x = Flatten()(x)
    
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.3)(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.3)(x)
    x = Dense(32, activation='relu')(x)
    x = Dropout(0.3)(x)
    
    outputs = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=outputs)
    return model

input_shape = (164, 1) 
num_classes = 7

model_1d_resnet = build_1d_resnet(input_shape, num_classes)
model_1d_resnet.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_1d_resnet.summary()

In [None]:
rlrp = ReduceLROnPlateau(monitor='loss', factor=0.4, verbose=0, patience=2, min_lr=0.0000001)
hist_1d_resnet = model_1d_resnet.fit(x_train_1d_resnet, y_train, validation_data=(x_test_1d_resnet, y_test), batch_size=32, epochs=120, callbacks=[rlrp])

In [None]:
y_pred_1d_resnet = model_1d_resnet.predict(x_test_1d_resnet)
y_pred = encoder.inverse_transform(y_pred_1d_resnet)
y_test_1d_resnet = encoder.inverse_transform(y_test)

In [None]:
df_1d_resnet = pd.DataFrame(columns=['Predicted Labels', 'Actual Labels'])
df_1d_resnet['Predicted Labels'] = y_pred.flatten()
df_1d_resnet['Actual Labels'] = y_test_1d_resnet.flatten()

df_1d_resnet

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
print("Accuracy of our model on test data : " ,model_1d_resnet.evaluate(x_test_1d_resnet,y_test)[1]*100 , "%")
epochs = [i for i in range(100)]
fig , ax = plt.subplots(1,3)
train_acc = hist_1d_resnet.history['accuracy']
train_acc = train_acc[20:120]
train_loss = hist_1d_resnet.history['loss']
train_loss = train_loss[20:120]
test_acc = hist_1d_resnet.history['val_accuracy']
test_acc = test_acc[20:120]
test_loss = hist_1d_resnet.history['val_loss']
test_loss = test_loss[20:120]

labels=['breath', 'cough', 'crying', 'laugh', 'scream', 'sneeze','yawn']
fig.set_size_inches(20,4)
# plt.rcParams['axes.grid'] = False
plt.rcParams['font.weight'] = "bold"
ax[1].plot(epochs , train_loss , label = 'Training Loss')
ax[1].plot(epochs , test_loss , label = 'Testing Loss')
ax[1].set_title('Model Loss',fontweight="bold",fontsize = 15)
ax[1].legend()
ax[1].set_xlabel("Number of Epochs", fontweight="bold", fontsize = 15)

ax[0].plot(epochs , train_acc , label = 'Training Accuracy')
ax[0].plot(epochs , test_acc , label = 'Testing Accuracy')
ax[0].set_title('Model Accuracy',fontweight="bold",fontsize = 15)
ax[0].legend()
ax[0].set_xlabel("Number of Epochs", fontweight="bold", fontsize = 15)

cf_matrix = confusion_matrix(y_test_1d_resnet, y_pred)
disp = ConfusionMatrixDisplay(cf_matrix, display_labels=epochs)
disp.plot(ax=ax[2],xticks_rotation='vertical',cmap="Greens")
disp.ax_.set_title("Confusion Matrix", fontweight="bold",fontsize = 15)
disp.ax_.set_xticklabels(labels, fontsize = 11)
disp.ax_.set_yticklabels(labels, fontsize = 11)
disp.im_.colorbar.remove()
disp.ax_.set_xlabel('')
disp.ax_.set_ylabel('True label',fontweight="bold", fontsize = 15)
disp.ax_.set_xlabel('Predicted label', fontweight="bold", fontsize = 15)

fig.text(0.45, -0.14, '1D Resnet Model', fontweight="bold", fontsize = 20)
plt.show()

# **TDNN**

In [None]:
import numpy as np

# Sample data shapes
x_train_shape = (4077, 164)
y_train_tdnn = (4077, 7)
x_test_shape = (1020, 164)
y_test_tdnn = (1020, 7)

# Reshape the data to 3D
x_train_tdnn = np.reshape(x_train, (x_train_shape[0], x_train_shape[1], 1))
x_test_tdnn = np.reshape(x_test, (x_test_shape[0], x_test_shape[1], 1))

x_train_tdnn.shape, x_test_tdnn.shape, x_train.shape, y_train.shape

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense

def build_tdnn_model(input_shape, num_classes):
    model = Sequential([
        Conv1D(64, 3, activation='relu', input_shape=input_shape, padding='same'),
        MaxPooling1D(2),
        Conv1D(128, 3, activation='relu', padding='same'),
        MaxPooling1D(2),
        Conv1D(256, 3, activation='relu', padding='same'),
        MaxPooling1D(2),
        Conv1D(512, 3, activation='relu', padding='same'),
        MaxPooling1D(2),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    
    return model

input_shape = (164, 1)
num_classes = 7

model_tdnn = build_tdnn_model(input_shape, num_classes)
model_tdnn.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])
model_tdnn.summary()

In [None]:
rlrp = ReduceLROnPlateau(monitor='loss', factor=0.4, verbose=0, patience=2, min_lr=0.0000001)
hist_tdnn = model_tdnn.fit(x_train_tdnn, y_train, validation_data=(x_test_tdnn, y_test), batch_size=32, epochs=120, callbacks=[rlrp])

In [None]:
y_pred_tdnn = model_tdnn.predict(x_test_tdnn)
y_pred = encoder.inverse_transform(y_pred_tdnn)
y_test_tdnn = encoder.inverse_transform(y_test)

In [None]:
df_1d_tdnn = pd.DataFrame(columns=['Predicted Labels', 'Actual Labels'])
df_1d_tdnn['Predicted Labels'] = y_pred.flatten()
df_1d_tdnn['Actual Labels'] = y_test_tdnn.flatten()

df_1d_tdnn

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
print("Accuracy of our model on test data : " ,model_tdnn.evaluate(x_test_tdnn,y_test)[1]*100 , "%")
epochs = [i for i in range(110)]
fig , ax = plt.subplots(1,3)
train_acc = hist_tdnn.history['accuracy']
train_acc = train_acc[10:120]
train_loss = hist_tdnn.history['loss']
train_loss = train_loss[10:120]
test_acc = hist_tdnn.history['val_accuracy']
test_acc = test_acc[10:120]
test_loss = hist_tdnn.history['val_loss']
test_loss = test_loss[10:120]

labels=['breath', 'cough', 'crying', 'laugh', 'scream', 'sneeze','yawn']
fig.set_size_inches(20,4)
# plt.rcParams['axes.grid'] = False
plt.rcParams['font.weight'] = "bold"
ax[1].plot(epochs , train_loss , label = 'Training Loss')
ax[1].plot(epochs , test_loss , label = 'Testing Loss')
ax[1].set_title('Model Loss',fontweight="bold",fontsize = 15)
ax[1].legend()
ax[1].set_xlabel("Number of Epochs", fontweight="bold", fontsize = 15)

ax[0].plot(epochs , train_acc , label = 'Training Accuracy')
ax[0].plot(epochs , test_acc , label = 'Testing Accuracy')
ax[0].set_title('Model Accuracy',fontweight="bold",fontsize = 15)
ax[0].legend()
ax[0].set_xlabel("Number of Epochs", fontweight="bold", fontsize = 15)

cf_matrix = confusion_matrix(y_test_tdnn, y_pred)
disp = ConfusionMatrixDisplay(cf_matrix, display_labels=epochs)
disp.plot(ax=ax[2],xticks_rotation='vertical',cmap="Greens")
disp.ax_.set_title("Confusion Matrix", fontweight="bold",fontsize = 15)
disp.ax_.set_xticklabels(labels, fontsize = 11)
disp.ax_.set_yticklabels(labels, fontsize = 11)
disp.im_.colorbar.remove()
disp.ax_.set_xlabel('')
disp.ax_.set_ylabel('True label',fontweight="bold", fontsize = 15)
disp.ax_.set_xlabel('Predicted label', fontweight="bold", fontsize = 15)

fig.text(0.45, -0.14, 'TDNN Model', fontweight="bold", fontsize = 20)
plt.show()

# **CNN-BiLSTM**

In [None]:
import numpy as np

# Sample data shapes
x_train_shape = (4077, 164)
y_train_CNN_BiLSTM = (4077, 7)
x_test_shape = (1020, 164)
y_test_CNN_BiLSTM = (1020, 7)

# Reshape the data to 3D
x_train_CNN_BiLSTM = np.reshape(x_train, (x_train_shape[0], x_train_shape[1], 1))
x_test_CNN_BiLSTM = np.reshape(x_test, (x_test_shape[0], x_test_shape[1], 1))

x_train_CNN_BiLSTM.shape, x_test_CNN_BiLSTM.shape, x_train.shape, y_train.shape

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dropout, Dense, Bidirectional, LSTM

model_CNN_BiLSTM = Sequential([
    Conv1D(128, 3, activation='relu'),
    MaxPooling1D(2),
    Conv1D(64, 3, activation='relu', input_shape=(164, 1)),
    MaxPooling1D(2),
    Dropout(0.3),
    Bidirectional(LSTM(64, return_sequences=True)),
    Dropout(0.3),
    Bidirectional(LSTM(32)),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(7, activation='softmax')  # Adjust the number of units for your classification task
])

In [None]:
model_CNN_BiLSTM.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_CNN_BiLSTM.build((None, 164, 1))
model_CNN_BiLSTM.summary()

In [None]:
rlrp = ReduceLROnPlateau(monitor='loss', factor=0.4, verbose=0, patience=2, min_lr=0.0000001)
hist_CNN_BiLSTM = model_CNN_BiLSTM.fit(x_train_CNN_BiLSTM, y_train, validation_data=(x_test_CNN_BiLSTM, y_test), batch_size=32, epochs=120, callbacks=[rlrp])

In [None]:
y_pred_CNN_BiLSTM = model_CNN_BiLSTM.predict(x_test_CNN_BiLSTM)
y_pred = encoder.inverse_transform(y_pred_CNN_BiLSTM)
y_test_CNN_BiLSTM = encoder.inverse_transform(y_test)

In [None]:
df_CNN_BiLSTM = pd.DataFrame(columns=['Predicted Labels', 'Actual Labels'])
df_CNN_BiLSTM['Predicted Labels'] = y_pred.flatten()
df_CNN_BiLSTM['Actual Labels'] = y_test_CNN_BiLSTM.flatten()

df_CNN_BiLSTM

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
print("Accuracy of our model on test data : " ,model_CNN_BiLSTM.evaluate(x_test_CNN_BiLSTM,y_test)[1]*100 , "%")
epochs = [i for i in range(110)]
fig , ax = plt.subplots(1,3)
train_acc = hist_CNN_BiLSTM.history['accuracy']
train_acc = train_acc[10:120]
train_loss = hist_CNN_BiLSTM.history['loss']
train_loss = train_loss[10:120]
test_acc = hist_CNN_BiLSTM.history['val_accuracy']
test_acc = test_acc[10:120]
test_loss = hist_CNN_BiLSTM.history['val_loss']
test_loss = test_loss[10:120]

labels=['breath', 'cough', 'crying', 'laugh', 'scream', 'sneeze','yawn']
fig.set_size_inches(20,4)
# plt.rcParams['axes.grid'] = False
plt.rcParams['font.weight'] = "bold"
ax[1].plot(epochs , train_loss , label = 'Training Loss')
ax[1].plot(epochs , test_loss , label = 'Testing Loss')
ax[1].set_title('Model Loss',fontweight="bold",fontsize = 15)
ax[1].legend()
ax[1].set_xlabel("Number of Epochs", fontweight="bold", fontsize = 15)

ax[0].plot(epochs , train_acc , label = 'Training Accuracy')
ax[0].plot(epochs , test_acc , label = 'Testing Accuracy')
ax[0].set_title('Model Accuracy',fontweight="bold",fontsize = 15)
ax[0].legend()
ax[0].set_xlabel("Number of Epochs", fontweight="bold", fontsize = 15)

cf_matrix = confusion_matrix(y_test_CNN_BiLSTM, y_pred)
disp = ConfusionMatrixDisplay(cf_matrix, display_labels=epochs)
disp.plot(ax=ax[2],xticks_rotation='vertical',cmap="Greens")
disp.ax_.set_title("Confusion Matrix", fontweight="bold",fontsize = 15)
disp.ax_.set_xticklabels(labels, fontsize = 11)
disp.ax_.set_yticklabels(labels, fontsize = 11)
disp.im_.colorbar.remove()
disp.ax_.set_xlabel('')
disp.ax_.set_ylabel('True label',fontweight="bold", fontsize = 15)
disp.ax_.set_xlabel('Predicted label', fontweight="bold", fontsize = 15)

fig.text(0.45, -0.14, 'CNN-BiLSTM Model', fontweight="bold", fontsize = 20)
plt.show()

# **MLP Model**

In [None]:
import numpy as np

# Sample data shapes
x_train_shape = (4077, 164)
y_train_MLP = (4077, 7)
x_test_shape = (1020, 164)
y_test_MLP = (1020, 7)

# Reshape the data to 3D
x_train_MLP = np.reshape(x_train, (x_train_shape[0], x_train_shape[1], 1))
x_test_MLP = np.reshape(x_test, (x_test_shape[0], x_test_shape[1], 1))

x_train_MLP.shape, x_test_MLP.shape, x_train.shape, y_train.shape

In [None]:
from sklearn.neural_network import MLPClassifier
model_MLP = MLPClassifier(batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300), learning_rate='adaptive', max_iter=500, solver='adam')
hist_MLP = model_MLP.fit(x_train, y_train)

In [None]:
y_pred_MLP = model_MLP.predict(x_test)
y_pred = encoder.inverse_transform(y_pred_MLP)
y_test_MLP = encoder.inverse_transform(y_test)

In [None]:
df_MLP = pd.DataFrame(columns=['Predicted Labels', 'Actual Labels'])
df_MLP['Predicted Labels'] = y_pred.flatten()
df_MLP['Actual Labels'] = y_test_MLP.flatten()

df_MLP

In [None]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_true=y_test_MLP, y_pred=y_pred)
print('Achieved Accuracy: {:.2f}%'.format(accuracy*100))

In [None]:
from mlxtend.plotting import plot_confusion_matrix
conf_matrix = confusion_matrix(y_test_MLP, y_pred)
fig, ax = plot_confusion_matrix(conf_mat=conf_matrix, figsize=(6,6), cmap=plt.cm.Blues)
labels=['breath', 'cough', 'crying', 'laugh', 'scream', 'sneeze','yawn']
ax.set_xlabel('Predicted labels', fontsize=14, weight = 'bold')
ax.set_ylabel('True labels', fontsize=14, weight = 'bold')
ax.set_title('Confusion Matrix Using MLP', fontsize=16, weight = 'bold')
ax.set_xticklabels(labels, fontsize = 11)
ax.set_yticklabels(labels, fontsize = 11)
plt.setp(ax.get_xticklabels(), rotation=35, horizontalalignment='right')
plt.show()