# *Recognition of spoken digits using Deep learning frameworks*

In [1]:
import utils # We import the utility library that we will use throughout this Notebook

### 1. Reading the data

In [None]:
# Data parser reads the wave forms of the wav files located in digits directory
wavs, speakers, digits = utils.data_parser()

### 2. Visualizing & Displaying some samples of the dataset

In [None]:
utils.vis_wave_form(wavs[:4], speakers[:4], digits[:4])

In [None]:
# Sample 1
import IPython.display as ipd 
import os
ipd.Audio(os.path.join("digits", digits[0]+speakers[0] + ".wav"))

In [None]:
# Sample 2
ipd.Audio(os.path.join("digits", digits[1]+speakers[1]+ ".wav"))

In [None]:
# Sample 3
ipd.Audio(os.path.join("digits", digits[2]+speakers[2]+ ".wav"))

In [None]:
# Sample 4
ipd.Audio(os.path.join("digits", digits[3]+speakers[3]+ ".wav"))

### 3. Extracting the MFCCs, deltas and delta-deltas for each audio sample.

In [None]:
mfccs, deltas, ddeltas = utils.extract_mfcss(wavs)

### 4. Combining MFCCs, Deltas and Delta-Deltas to create a feature vector - Testing some classic ML Classifiers

In [None]:
import numpy as np

features = np.empty((0,78), dtype = np.float32)

for i in range(len(mfccs)):
    feature_vector = np.concatenate((mfccs[i], deltas[i], ddeltas[i]), axis = 1)
    mean_vector = np.mean(feature_vector, axis = 0)
    sd_vector = np.std(feature_vector, axis = 0)
    nums = len(mean_vector)
    feature_vector = np.resize(np.concatenate((mean_vector, sd_vector), axis =0), (1,2*nums))
    features = np.append(features, feature_vector, axis = 0)


In [None]:
# Create a 2D - Scatterplot of the feature vector for each digit

import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

correspondence = {"one":1, "two": 2, "three": 3, "four": 4,
"five":5, "six":6, "seven": 7, "eight": 8, "nine": 9}
nums = []
for digit in digits:
    nums.append(correspondence[digit])

data = pd.DataFrame({"x": features[:,0], "y": features[:,1],
                    "labels": digits, "nums": nums}).sort_values(by = "nums")

sns.set_style("dark")
fig, ax = plt.subplots(figsize = (9,5))

sns.scatterplot(data = data, x = "x", y = "y", hue = "labels",
ax = ax)
ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")
ax.set_title("Visualizing the first two dimensions of features",fontsize = 15)
plt.show()

In [None]:
# Visualizing using 2D - PCA projection
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
X_new = pca.fit_transform(features)

data = pd.DataFrame({"x": X_new[:,0], "y": X_new[:,1],
                    "labels": digits, "nums": nums}).sort_values(by = "nums")

sns.set_style("dark")
fig, ax = plt.subplots(figsize = (9,5))

sns.scatterplot(data = data, x = "x", y = "y", hue = "labels",
ax = ax)
ax.set_xlabel("Principal component 1")
ax.set_ylabel("Principal component 2")
ax.set_title("Visualizing the data samples using 2D PCA",fontsize = 15)
plt.show()

## Part 1. Testing some classic ML Classifiers

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

labels = nums
X = features

X_train, X_test, y_train, y_test = train_test_split(
    X,labels, test_size = 0.30, random_state = 42
)

#### 5.1 Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB

gnb = Pipeline([('scaler', StandardScaler()), ('gnb', GaussianNB())])
# gnb = GaussianNB()
y_preds = gnb.fit(X_train, y_train).predict(X_test)

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test,y_preds))

#### 5.2 SVM

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC

clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
y_preds = clf.fit(X_train, y_train).predict(X_test)

print(classification_report(y_test,y_preds))

## Part 2. Deep Learning Models

In [None]:
dataset = "recordings"

X_train, X_test, y_train, y_test, spk_train, spk_test = utils.parser(dataset)

#### 5.1 LSTM

In [None]:
from torch.utils.data import DataLoader
from utils import collate_pad_zeros

In [None]:
BATCH_SIZE = 16

In [None]:
train_dset = utils.custom_dataset(X_train, y_train)
train_dloader = DataLoader(train_dset, batch_size = BATCH_SIZE,
                            collate_fn=collate_pad_zeros)