In [None]:
from glob import glob
from tensorflow import keras
from IPython.display import Audio
from keras.models import Sequential
from keras.layers import Dense,LSTM,Dropout
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd 
import librosa as lr
import seaborn as sns
import matplotlib.pyplot as plt
import os
import warnings
import librosa.display
warnings.filterwarnings('ignore')

In [None]:
data_directory = r'/content/drive/MyDrive/Colab Notebooks/Projects/Speech Emotion Recognition/Dataset/Emotions'
audio_files = glob(data_directory + '/*.wav')

In [None]:
len(audio_files)

In [None]:
paths = []
labels = []

In [None]:
for dirname,_,filenames in os.walk(r'/content/drive/MyDrive/Colab Notebooks/Projects/Speech Emotion Recognition/Dataset/Emotions'):
  for filename in filenames:
    paths.append(os.path.join(dirname,filename))
    print(filename)
    label = filename.split('_')[-1]
    print(label)
    label = label.split('.')[0]
    print(label.lower())
    labels.append(label.lower())

In [None]:
len(paths)

In [None]:
paths[:7]

In [None]:
labels[:7]

In [None]:
df = pd.DataFrame()

In [None]:
df['speech'] = paths
df['label'] = labels

In [None]:
df.head()

In [None]:
df['label'].value_counts()

In [None]:
sns.countplot(df['label'])

In [None]:
def waveplot(data,sr,emotion):
  plt.figure(figsize = (10,4))
  plt.title(emotion,size = 20)
  librosa.display.waveshow(data,sr = sr)
  plt.show()

In [None]:
def spectogram(data,sr,emotion):
  x = librosa.stft(data)
  xdb = librosa.amplitude_to_db(abs(x))
  plt.figure(figsize = (11,4))
  plt.title(emotion,size = 20)
  librosa.display.specshow(xdb,sr = sr,x_axis = 'time',y_axis = 'hz')
  plt.colorbar()

In [None]:
emotion = 'angry'
path = np.array(df['speech'][df['label'] == emotion])[0]
data,sampling_rate = librosa.load(path)
waveplot(data,sampling_rate,emotion)
spectogram(data,sampling_rate,emotion)
Audio(path)

In [None]:
emotion = 'disgust'
path = np.array(df['speech'][df['label'] == emotion])[0]
data,sampling_rate = librosa.load(path)
waveplot(data,sampling_rate,emotion)
spectogram(data,sampling_rate,emotion)
Audio(path)

In [None]:
emotion = 'fear'
path = np.array(df['speech'][df['label'] == emotion])[0]
data,sampling_rate = librosa.load(path)
waveplot(data,sampling_rate,emotion)
spectogram(data,sampling_rate,emotion)
Audio(path)

In [None]:
emotion = 'happy'
path = np.array(df['speech'][df['label'] == emotion])[0]
data,sampling_rate = librosa.load(path)
waveplot(data,sampling_rate,emotion)
spectogram(data,sampling_rate,emotion)
Audio(path)

In [None]:
emotion = 'neutral'
path = np.array(df['speech'][df['label'] == emotion])[0]
data,sampling_rate = librosa.load(path)
waveplot(data,sampling_rate,emotion)
spectogram(data,sampling_rate,emotion)
Audio(path)

In [None]:
emotion = 'ps'
path = np.array(df['speech'][df['label'] == emotion])[0]
data,sampling_rate = librosa.load(path)
waveplot(data,sampling_rate,emotion)
spectogram(data,sampling_rate,emotion)
Audio(path)

In [None]:
emotion = 'sad'
path = np.array(df['speech'][df['label'] == emotion])[0]
data,sampling_rate = librosa.load(path)
waveplot(data,sampling_rate,emotion)
spectogram(data,sampling_rate,emotion)
Audio(path)

In [None]:
def extract_mfcc(filename):
  y,sr = librosa.load(filename,duration = 4,offset = 0.5)
  mfcc = np.mean(librosa.feature.mfcc(y = y,sr = sr,n_mfcc = 40).T,axis = 0)
  return mfcc

In [None]:
extract_mfcc(df['speech'][0])

In [None]:
X_mfcc = df['speech'].apply(lambda x: extract_mfcc(x))

In [None]:
X_mfcc

In [None]:
X = [x for x in X_mfcc]
X = np.array(X)
X.shape

In [None]:
X = np.expand_dims(X,-1)
X.shape

In [None]:
enc = OneHotEncoder()
y = enc.fit_transform(df[['label']])

In [None]:
y[0]

In [None]:
y = y.toarray()

In [None]:
y

In [None]:
y.shape

In [None]:
x_train,x_test,y_train,y_test = train_test_split(X,y,random_state = 0,shuffle = True)

In [None]:
x_train.shape,y_train.shape,x_test.shape,y_test.shape

In [None]:
model_1 = Sequential([
    LSTM(123,return_sequences = False,input_shape = (40,1)),
    Dense(64,activation = 'relu'),
    Dropout(0.2),
    Dense(32,activation = 'relu'),
    Dropout(0.2),
    Dense(7,activation = 'softmax')])

In [None]:
model_1.compile(loss = 'categorical_crossentropy',optimizer = 'adam',metrics = ['accuracy'])

In [None]:
model_1.summary()

In [None]:
history_1 = model_1.fit(x_train,y_train,validation_data = (x_test,y_test),epochs = 80,batch_size = 512,shuffle = True)

In [None]:
print("Accuracy of the model on test data :",model_1.evaluate(x_test,y_test)[1]*100,"%")

In [None]:
print("Accuracy of the model on train data :",model_1.evaluate(x_train,y_train)[1]*100,"%")

In [None]:
epochs = list(range(80))

acc = history_1.history['accuracy']
val_acc = history_1.history['val_accuracy']
plt.plot(epochs,acc,label = 'Train Accuracy')
plt.plot(epochs,val_acc,label = 'Val Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
loss = history_1.history['loss']
val_loss = history_1.history['val_loss']
plt.plot(epochs,loss,label = 'Train Loss')
plt.plot(epochs,val_loss,label = 'Val Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
model_2 = keras.Sequential([
    keras.layers.Flatten(input_shape = (40,1)),
    keras.layers.Dense(128,activation = 'relu'),
    keras.layers.Dense(128,activation = 'relu'),
    keras.layers.Dense(7,activation = 'softmax')])

In [None]:
model_2.compile(loss = 'categorical_crossentropy',optimizer = 'adam',metrics = ['accuracy'])

In [None]:
model_2.summary()

In [None]:
history_2 = model_2.fit(x_train,y_train,validation_data = (x_test,y_test),epochs = 80,batch_size = 512,shuffle = True)

In [None]:
print("Accuracy of the model on test data :",model_2.evaluate(x_test,y_test)[1]*100,"%")

In [None]:
print("Accuracy of the model on train data :",model_2.evaluate(x_train,y_train)[1]*100,"%")

In [None]:
epochs = list(range(80))

acc = history_2.history['accuracy']
val_acc = history_2.history['val_accuracy']
plt.plot(epochs,acc,label = 'Train Accuracy')
plt.plot(epochs,val_acc,label = 'Val Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
loss = history_2.history['loss']
val_loss = history_2.history['val_loss']
plt.plot(epochs,loss,label = 'Train Loss')
plt.plot(epochs,val_loss,label = 'Val Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
def func_pred_1(pred):
  prediction = model_1.predict(pred.reshape(-1,40,1))
  return labels[np.argmax(prediction)] 

In [None]:
def get_test_data():
  test_directory = r'/content/drive/MyDrive/Colab Notebooks/Projects/Speech Emotion Recognition/Test'
  audio_file = glob(test_directory + '/*.wav')
  val_paths = []
  for dirname,_,filenames in os.walk(r'/content/drive/MyDrive/Colab Notebooks/Projects/Speech Emotion Recognition/Test'):
    for filename in filenames:
      val_paths.append(os.path.join(dirname,filename))
      print(filename)
  df1 = pd.DataFrame()
  df1['val_speech'] = val_paths
  return df1

temp = get_test_data()
for i in range(len(temp)):
    pred = extract_mfcc(temp['val_speech'][i])
    print(func_pred_1(pred))

In [None]:
def func_pred_2(pred):
  prediction = model_2.predict(pred.reshape(-1,40,1))
  return labels[np.argmax(prediction)] 

In [None]:
def get_test_data():
  test_directory = r'/content/drive/MyDrive/Colab Notebooks/Projects/Speech Emotion Recognition/Test'
  audio_file = glob(test_directory + '/*.wav')
  val_paths = []
  for dirname,_,filenames in os.walk(r'/content/drive/MyDrive/Colab Notebooks/Projects/Speech Emotion Recognition/Test'):
    for filename in filenames:
      val_paths.append(os.path.join(dirname,filename))
      print(filename)
  df1 = pd.DataFrame()
  df1['val_speech'] = val_paths
  return df1

temp = get_test_data()
for i in range(len(temp)):
    pred = extract_mfcc(temp['val_speech'][i])
    print(func_pred_2(pred))