In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Import Module

In [None]:
import pandas as pd
import numpy as np
import os # File Manipulation
# Visualization
import seaborn as sns
import matplotlib.pyplot as plt
# Audio Libraries
import librosa
import librosa.display

from IPython.display import Audio # Import Audio
import warnings
warnings.filterwarnings('ignore') # Will ignore all the warnings

## Load The Dataset

In [None]:
paths = []
labels = []
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        paths.append(os.path.join(dirname, filename))
        label = filename.split('_')[-1]
        label = label.split('.')[0]
        labels.append(label.lower())
print('Dataset is loaded')

In [None]:
labels[:5]

In [None]:
paths[:5]

## Creating Dataframe

In [None]:
df = pd.DataFrame()
df['speech'] = paths
df['labels'] = labels
df.head()

In [None]:
df['labels'].value_counts()

## Exploratory Data Analysis

In [None]:
sns.countplot(df['labels']) # Perform class balancing if the not balanced

In [None]:
# Function to visualize the audio waveform
def waveplot(data, sr, emotion):
    plt.figure(figsize=(10,4))
    plt.title(emotion, size=20)
    librosa.display.waveshow(data, sr=sr)
    plt.show()
    
# Function to visualize the audio spectogram
def spectogram(data, sr, emotion):
    # Processing the data for spectogram visualization
    x = librosa.stft(data)
    xdb = librosa.amplitude_to_db(abs(x))
    plt.figure(figsize=(10,4))
    plt.title(emotion, size=20)
    librosa.display.specshow(xdb, sr=sr, x_axis='time', y_axis='hz')
    plt.colorbar()

In [None]:
emotion = 'fear'
path = np.array(df['speech'][df['labels'] == emotion])[0]
data, sampling_rate = librosa.load(path) # Loading the audio file

waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)

Audio(path)


In [None]:
emotion = 'angry'
path = np.array(df['speech'][df['labels'] == emotion])[0]
data, sampling_rate = librosa.load(path) # Loading the audio file

waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)

Audio(path)

In [None]:
emotion = 'disgust'
path = np.array(df['speech'][df['labels'] == emotion])[0]
data, sampling_rate = librosa.load(path) # Loading the audio file

waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)

Audio(path)

In [None]:
emotion = 'neutral'
path = np.array(df['speech'][df['labels'] == emotion])[0]
data, sampling_rate = librosa.load(path) # Loading the audio file

waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)

Audio(path)

In [None]:
emotion = 'sad'
path = np.array(df['speech'][df['labels'] == emotion])[0]
data, sampling_rate = librosa.load(path) # Loading the audio file

waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)

Audio(path)

In [None]:
emotion = 'ps'
path = np.array(df['speech'][df['labels'] == emotion])[0]
data, sampling_rate = librosa.load(path) # Loading the audio file

waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)

Audio(path)

In [None]:
emotion = 'happy'
path = np.array(df['speech'][df['labels'] == emotion])[0]
data, sampling_rate = librosa.load(path) # Loading the audio file

waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)

Audio(path)

## Feature Extraction

In [None]:
def extract_mfcc(filename):
    y, sr = librosa.load(filename, duration=3, offset=0.5)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0) # 40 is the number of features after which we get the mean
    return mfcc

In [None]:
extract_mfcc(df['speech'][0]) # Test for 1 file

In [None]:
X_mfcc = df['speech'].apply(lambda x: extract_mfcc(x))

In [None]:
X_mfcc

In [None]:
# Convert list into 2-dimentional numpy array
X = [x for x in X_mfcc]
X = np.array(X)
X.shape

In [None]:
# Input split and expand the dimension which is accepted by the LSTM Model
x = np.expand_dims(X, -1)
x.shape # We get the number of samples and features

In [None]:
X[1:]

In [None]:
# Load the output column by converting to categorical columns
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()
y = enc.fit_transform(df[['labels']])

In [None]:
labels[600:605]

In [None]:
y[600:605]

In [None]:
y = y.toarray() # Sparse matrix (1 means corresponding output label is enabled)

In [None]:
y.shape # Gives us the number of samples and number of categories

We don't need to do test split and train split
Valuation split will be done in the LSTM model itself

## Create the LSTM Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

# Adding different layers to our model
# RNN loppback

model = Sequential([
    LSTM(256, return_sequences=False, input_shape=(40,1)),
    Dropout(0.2),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(7, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [None]:
X.shape, y.shape

## Train the model

In [None]:
# X is the input, y is the output
history = model.fit(
    x,
    y,
    validation_split=0.2,
    epochs=80,
    batch_size=200,
    shuffle=True
)

## Plot the Results

In [None]:
epochs = list(range(100))
accuracy = history.history['accuracy']
validation_accuracy = history.history['val_accuracy']

plt.plot(epochs, accuracy, label='train_accuracy')
plt.plot(epochs, validation_accuracy, label='validation_accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()

In [None]:
loss = history.history['loss']
validation_loss = history.history['val_loss']

plt.plot(epochs, loss, label='train_loss')
plt.plot(epochs, validation_loss, label='validation_loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.show()

In [None]:
model.save('/kaggle/working/trained_models/ser.h5')

In [None]:
import tensorflow as tf
# Convert the model
converter = tf.lite.TFLiteConverter.from_keras_model(model) # path to the SavedModel directory
tflite_model = converter.convert()

# Save the model.  
with open('/kaggle/working/trained_models/model300epochs.tflite', 'wb') as f:
  f.write(tflite_model)

In [None]:
# ls = model.predict(['/kaggle/input/myvoice/angry.wav','/kaggle/input/myvoice/sad.wav','/kaggle/input/myvoice/happy.wav'])


test_aud = extract_mfcc('/kaggle/input/hellow/harshitsad.wav')
test_aud = np.array(test_aud)
test_aud=np.expand_dims(test_aud,-1)

test_aud=test_aud.reshape(1,-1,1)

test_aud.shape

ls = model.predict(test_aud)
print(ls)

In [None]:
labels