### Preliminaries

In [None]:
import pandas as pd
import numpy as np
from numpy import save

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

import keras
from keras.utils import to_categorical
from keras.models import Sequential,Input,Model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Conv1D, MaxPooling1D
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import LeakyReLU

In [None]:
## load  processed data
training_data = np.load("training_data.npy", allow_pickle = True)
labels = np.load("labels.npy", allow_pickle = True)
training_data.shape,training_data.shape

### 1D Convolutional Neural Network

Althoug, we can reach a good accuracy with 2D layers, the 1D layer turned out to have better accuracy for this data. This is not surprusing, 1D CNN it is widely used for speech recognition. So, let's get started.

Upload data, reshape and transform the targets.

In [None]:
#go back to two three dimensions
train_X = training_data
#test_X = test_data
train_Y = labels

train_X = train_X.reshape(-1, 99,13)
#test_X = test_X.reshape(-1, 99,13)

# transform the labels
train_Y_one_hot = to_categorical(train_Y)

train_X.shape #, test_X.shape

Create validation set.

In [None]:
## CREATE THE VALIDATION SET 
train_X,valid_X,train_label,valid_label = train_test_split(train_X, train_Y_one_hot, test_size=0.2, 
                                                           random_state=13)

Hyperparameters tuning.

In [None]:
# set up hyperparameters 
batch_size = 256
epochs = 1
num_classes = 35 # fix
np.random.seed(222)

As you can see, this is a very deep model. My group and I spent a lot of time tuning this model because we knew we could reach a very high accuracy. Eventually we were able to reach 95% but I know that it is also possible to get 98%. I leave you this challenge to you.

In [None]:
fashion_model = Sequential()

fashion_model.add(Conv1D(64, kernel_size=6,activation='relu',padding='same',input_shape=(99,13)))
fashion_model.add(BatchNormalization())
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling1D(pool_size=2,padding='same'))

fashion_model.add(Dropout(0.2))
fashion_model.add(Conv1D(128, kernel_size=6, activation='relu',padding='same'))
fashion_model.add(BatchNormalization())
fashion_model.add(LeakyReLU(alpha=0.1))

fashion_model.add(Conv1D(128, kernel_size=6, activation='relu',padding='same'))
fashion_model.add(BatchNormalization())
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling1D(pool_size=2,padding='same'))
fashion_model.add(Dropout(0.2))

fashion_model.add(Conv1D(128, kernel_size=6, activation='relu',padding='same'))
fashion_model.add(BatchNormalization())
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling1D(pool_size=2,padding='same'))
fashion_model.add(Dropout(0.2))


fashion_model.add(Conv1D(128, kernel_size=6, activation='relu',padding='same'))
fashion_model.add(BatchNormalization())
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling1D(pool_size=2,padding='same'))
fashion_model.add(Dropout(0.2))

fashion_model.add(Conv1D(256, kernel_size=6, activation='relu',padding='same'))
fashion_model.add(BatchNormalization())
fashion_model.add(LeakyReLU(alpha=0.1))                  
fashion_model.add(MaxPooling1D(pool_size=2,padding='same'))
fashion_model.add(Dropout(0.2))

fashion_model.add(Conv1D(256, kernel_size=6, activation='relu',padding='same'))
fashion_model.add(BatchNormalization())
fashion_model.add(LeakyReLU(alpha=0.1))                  
fashion_model.add(MaxPooling1D(pool_size=2,padding='same'))
fashion_model.add(Dropout(0.2))

fashion_model.add(Conv1D(256, kernel_size=6, activation='relu',padding='same'))
fashion_model.add(BatchNormalization())
fashion_model.add(LeakyReLU(alpha=0.1))                  
fashion_model.add(MaxPooling1D(pool_size=2,padding='same'))
fashion_model.add(Dropout(0.2))

fashion_model.add(Conv1D(512, kernel_size=6, activation='relu',padding='same'))
fashion_model.add(BatchNormalization())
fashion_model.add(LeakyReLU(alpha=0.1))                  
fashion_model.add(MaxPooling1D(pool_size=2,padding='same'))
fashion_model.add(Dropout(0.2))

fashion_model.add(Conv1D(1024, kernel_size=6, activation='relu',padding='same'))
fashion_model.add(BatchNormalization())
fashion_model.add(LeakyReLU(alpha=0.1))                  
fashion_model.add(MaxPooling1D(pool_size=2,padding='same'))
fashion_model.add(Dropout(0.2))

fashion_model.add(Conv1D(1024, kernel_size=6, activation='relu',padding='same'))
fashion_model.add(BatchNormalization())
fashion_model.add(LeakyReLU(alpha=0.1))                  
fashion_model.add(MaxPooling1D(pool_size=2,padding='same'))
fashion_model.add(Dropout(0.2))

fashion_model.add(Flatten())
fashion_model.add(Dense(1024, activation='relu'))
fashion_model.add(LeakyReLU(alpha=0.1))           
fashion_model.add(Dropout(0.2))
fashion_model.add(Dense(num_classes, activation='softmax'))

In [None]:
fashion_model.summary()

In [None]:
fashion_model.compile(loss=keras.losses.categorical_crossentropy, 
                      optimizer=keras.optimizers.Adam(),
                      metrics=['accuracy'])

In [None]:
fashion_train = fashion_model.fit(train_X, train_label, 
                                  batch_size=batch_size,epochs=epochs,verbose=1,
                                  validation_data=(valid_X, valid_label))

If I remember correctly, we trained our model for 100 epochs. As always here, for the sake of the example I just ran it for 1 epoch.

## Conclusion

In this notebook we have discussed three different ways of dealing with a speech classification task. First, we tried to solve the problem with a ML algorithm, however, we could not go over 20% accuracy, which is normal becuase this kinds of tasks require to apply DL methods. Therefore, two different types of CNN layers were applied: 2D and 1D. As a result a very high accuracy was reached, altoghou a higher one is possible to achive.

What do we bring home?
- These kinds of tasks cannot be solve with classic ML algortihms
- A lot of time is recquired for tuning DL models and unfortunately we do not know which one is the best. Try and error is often the standard