In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input/speech-dataset/BESD/ENGLISH'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Import the libraries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import librosa
import librosa.display
from IPython.display import Audio
import warnings
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')

In [None]:
# path='/kaggle/input/speech-dataset/BESD/ENGLISH'
# audio=[]
# labels=[]
# for classnames in os.listdir(path):
#     classdir=os.path.join(path,classnames)
#     for filename in os.listdir(classdir):
#         audio.append(os.path.join(classdir,filename))
#         a=filename.split('_')[1]
#         b=a.split(' ')[1]
#         labels.append(b)
# print(labels.shape)

## Loading the dataset

In [None]:
path='/kaggle/input/speech-dataset/BESD/ENGLISH'
audio=[]
labels=[]
for classnames in os.listdir(path):
    classdir=os.path.join(path,classnames)
    for filename in os.listdir(classdir):
        audio.append(os.path.join(classdir,filename))
        labels.append(classnames)

In [None]:
audio[:5]

In [None]:
labels[:5]

In [None]:
df=pd.DataFrame()
df['speech']=audio
df['labels']=labels

In [None]:
df

In [None]:
df['labels'].value_counts()

## EXPLORATORY DATA ANALYSIS

In [None]:
def waveplot(data,sr,emotion):
    plt.figure(figsize=(10,4))
    plt.title(emotion,size=20)
    librosa.display.waveplot(data,sr=sr)
    plt.show()
    
def spectrogram(data,sr,emotion):
    x=librosa.stft(data)
    xdb=librosa.amplitude_to_db(abs(x))
    plt.figure(figsize=(10,4))
    plt.title(emotion,size=20)
    librosa.display.specshow(xdb,sr=sr,x_axis='time',y_axis='hz')
    plt.colorbar()

In [None]:
df['speech'][0]

In [None]:
# y: This variable represents the audio time series, 
# which is a one-dimensional array containing the amplitude of the 
# audio signal sampled at regular intervals over time. 
# Each element of the array represents the amplitude of the audio signal at a specific time point.

## FEATURE EXTRACTION

In [None]:
# Mel-Frequency Cepstral Coefficients
def extract_mfcc(filename):
    y,sr=librosa.load(filename,duration=3,offset=0.5)
    mfcc=np.mean(librosa.feature.mfcc(y=y,sr=sr,n_mfcc=40).T,axis=0)
    return mfcc

In [None]:
# y: This variable represents the audio time series, 
# which is a one-dimensional array containing the amplitude of the 
# audio signal sampled at regular intervals over time. 
# Each element of the array represents the amplitude of the audio signal at a specific time point.

In [None]:
extract_mfcc(df['speech'][0])

In [None]:
X_mfcc=df['speech'].apply(lambda x: extract_mfcc(x))

In [None]:
X_mfcc

In [None]:
X=[X for X in X_mfcc]
X=np.array(X)
X.shape

In [None]:
X=np.expand_dims(X,-1)
X.shape

In [None]:
from sklearn.preprocessing import OneHotEncoder
enc=OneHotEncoder()
y=enc.fit_transform(df[['labels']])

In [None]:
y=y.toarray()

In [None]:
y.shape

## Create LSTM model

In [None]:
from keras.models import Sequential
from keras.layers import Dense,LSTM,Dropout

model=Sequential([
    LSTM(123,return_sequences=False,input_shape=(40,1)),
    Dense(64,activation='relu'),
    Dropout(0.2),
    Dense(32,activation='relu'),
    Dropout(0.2),
    Dense(6,activation='softmax')
])

model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

In [None]:
history=model.fit(X,y,validation_split=0.2,epochs=100,batch_size=512,shuffle=True)

## PLOT THE GRAPHS

In [None]:
epochs=list(range(100))
acc=history.history['accuracy']
val_acc=history.history['val_accuracy']

plt.plot(acc,epochs,label='train accuracy')
plt.plot(val_acc,epochs,label='validation accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()