In [None]:

#loading the files as given in assignment
import numpy as np
import os

class WSJ():
    """ Load the WSJ speech dataset
        
        Ensure WSJ_PATH is path to directory containing 
        all data files (.npy) provided on Kaggle.
        
        Example usage:
            loader = WSJ()
            trainX, trainY = loader.train
            assert(trainX.shape[0] == 24590)
            
    """
  
    def _init_(self):
        self.dev_set = None
        self.train_set = None
        self.test_set = None
  
    @property
    def dev(self):
        if self.dev_set is None:
            self.dev_set = load_raw(os.environ['WSJ_PATH'], 'dev')
        return self.dev_set

    @property
    def train(self):
        if self.train_set is None:
            self.train_set = load_raw(os.environ['WSJ_PATH'], 'train')
        return self.train_set
  
    @property
    def test(self):
        if self.test_set is None:
            self.test_set = (np.load(os.path.join(os.environ['WSJ_PATH'], 'test.npy'), encoding='bytes'), None)
        return self.test_set
    
def load_raw(path, name):
    return (
        np.load(os.path.join(path, '{}.npy'.format(name)), encoding='bytes',allow_pickle=True), 
        np.load(os.path.join(path, '{}_labels.npy'.format(name)), encoding='bytes',allow_pickle=True)
    )

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#loading the X and Y data
file='/content/drive/MyDrive/NN/'
files = load_raw(file,'dev')
X = files[0]
Y = files[1]


In [None]:
X[0].shape,Y[0].shape

((388, 40), (388,))

In [None]:
#Stacking the data as it is an array of arrays
X=np.vstack(X)
Y = np.hstack(Y)

In [None]:
X.shape,Y.shape

((669294, 40), (669294,))

In [None]:
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.utils import np_utils
from keras.wrappers.scikit_learn import KerasClassifier

#dividing the data into train,test and validation for hyperparameter tuning

X_train,X_test,Y_train,Y_test = train_test_split(X,Y.reshape(-1,1),test_size=0.3,random_state=42)

X_val = X_train[-10000:]
X_train = X_train[:-10000]
Y_val = Y_train[-10000:]
Y_train = Y_train[:-10000]

In [None]:
#using garbage collector to delete unneeded data and release memory
import gc
del X
del Y
gc.collect()

140

In [None]:
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
import keras_tuner as kt
from keras_tuner.engine.tuner_utils import TunerStats

#Converting the Y data into 138 categories

num_classes = 138
Y_train = keras.utils.to_categorical(Y_train, num_classes)
Y_val = keras.utils.to_categorical(Y_val, num_classes)
Y_test = keras.utils.to_categorical(Y_test, num_classes)

Hyper Parameter Tuning to Find the accurate most single hidden layer MLP

In [None]:
#I first tried maximizing accurcy using a single hidden layer. Here we tune th hyperparameters.
#Single Layer
class MyHyperModel(kt.HyperModel):
    def build(self, hp):
        model = keras.Sequential()
        model.add(layers.Flatten())
        model.add(layers.Dense(units=hp.Int('units', 32, 512, step=32), activation=hp.Choice('activation', ['relu', 'tanh'])))    
        model.add(layers.Dense(138, activation="softmax"))
        learning_rate = hp.Choice("lr", [0.0001,0.001,0.01,0.1])
        model.compile(
            keras.optimizers.Adam(learning_rate=learning_rate), loss="categorical_crossentropy", metrics=["accuracy"],
        )
        return model

    def fit(self, hp, model, *args, **kwargs):
        return model.fit(
            *args,
            batch_size=hp.Choice("batch_size", [256]),
            **kwargs,
        )

Tuner_single = kt.RandomSearch(
    MyHyperModel(),
    objective="val_accuracy",
    max_trials=5,
    overwrite=True,
    directory="my_dir",
    project_name="tune_hypermodel_single",
)

In [None]:
#Single Layer
Tuner_single.search_space_summary()

Search space summary
Default search space size: 3
units (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
activation (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'tanh'], 'ordered': False}
lr (Choice)
{'default': 0.0001, 'conditions': [], 'values': [0.0001, 0.001, 0.01, 0.1], 'ordered': True}


In [None]:
Tuner_single.search(X_train, Y_train, epochs=5, validation_data=(X_val, Y_val),callbacks=[tf.keras.callbacks.EarlyStopping('val_loss', patience=3))

Trial 5 Complete [00h 01m 06s]
val_accuracy: 0.25380000472068787

Best val_accuracy So Far: 0.25380000472068787
Total elapsed time: 00h 05m 29s
INFO:tensorflow:Oracle triggered exit


Max accuracy for single hidden layer with random parameter search is 25.3 percent

In [None]:
Tuner_single.results_summary()


Results summary
Results in my_dir/tune_hypermodel_single
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x7f00f3f0fed0>
Trial summary
Hyperparameters:
units: 448
activation: tanh
lr: 0.0001
batch_size: 256
Score: 0.25380000472068787
Trial summary
Hyperparameters:
units: 384
activation: relu
lr: 0.01
batch_size: 256
Score: 0.20679999887943268
Trial summary
Hyperparameters:
units: 32
activation: relu
lr: 0.0001
batch_size: 256
Score: 0.2054000049829483
Trial summary
Hyperparameters:
units: 384
activation: tanh
lr: 0.1
batch_size: 256
Score: 0.09529999643564224
Trial summary
Hyperparameters:
units: 128
activation: relu
lr: 0.1
batch_size: 256
Score: 0.08009999990463257


In [None]:
#Using the hyperparameters, we build our single hidden layer model 
model = keras.Sequential()
model.add(layers.Flatten())
model.add(layers.Dense(units=448, activation='tanh'))    
model.add(layers.Dense(138, activation="softmax"))
model.compile(keras.optimizers.Adam(learning_rate=0.001), loss="categorical_crossentropy", metrics=["accuracy"],)

In [None]:
#Fitting the single hidden layer model to data, stopped midway as it took a long time
model.fit(
    x=X_train,
    y=Y_train,
    batch_size=256,
    epochs=15,
    verbose="auto",
    callbacks=None,
    validation_split=0.1,
    validation_data=None,
    shuffle=True,
    class_weight=None,
    sample_weight=None,
    initial_epoch=0,
    steps_per_epoch=None,
    validation_steps=None,
    validation_batch_size=None,
    validation_freq=1,
    max_queue_size=10,
    workers=-1,
    use_multiprocessing=True,
)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


KeyboardInterrupt: ignored

Trying to use multiple layers to improve accuracy of MLP

We tune our hyper parameters for multiple layers here

In [None]:
class MyHyperModel(kt.HyperModel):
    def build(self, hp):
        model = keras.Sequential()
        model.add(layers.Flatten())

        for i in range(hp.Int('layers', 1, 5)):
           model.add(layers.Dense(units=hp.Int('units_' + str(i), 32, 512, step=32), activation=hp.Choice('act_' + str(i), ['relu', 'tanh'])))
           if i < 4 :
             model.add(layers.Dropout(rate=0.2))
        
        model.add(layers.Dense(138, activation="softmax"))
        learning_rate = hp.Choice("lr", [0.0001,0.001,0.01,0.1])
        model.compile(
            keras.optimizers.Adam(learning_rate=learning_rate), loss="categorical_crossentropy", metrics=["accuracy"],
        )
        return model

    def fit(self, hp, model, *args, **kwargs):
        return model.fit(
            *args,
            batch_size=hp.Choice("batch_size", [256]),
            **kwargs,
        )

tuner = kt.RandomSearch(
    MyHyperModel(),
    objective="val_accuracy",
    max_trials=50,
    overwrite=True,
    directory="my_dir",
    project_name="tune_hypermodel",
)

In [None]:
tuner.search_space_summary()

Search space summary
Default search space size: 4
layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 5, 'step': 1, 'sampling': None}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
act_0 (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'tanh'], 'ordered': False}
lr (Choice)
{'default': 0.0001, 'conditions': [], 'values': [0.0001, 0.001, 0.01, 0.1], 'ordered': True}


Parameter tuning of multiple hidden layers

In [None]:
tuner.search(X_train, Y_train, epochs=20, validation_data=(X_val, Y_val))

Trial 27 Complete [00h 06m 44s]
val_accuracy: 0.25920000672340393

Best val_accuracy So Far: 0.30079999566078186
Total elapsed time: 04h 46m 50s

Search: Running Trial #28

Value             |Best Value So Far |Hyperparameter
2                 |2                 |layers
352               |384               |units_0
tanh              |tanh              |act_0
0.001             |0.001             |lr
256               |256               |batch_size
352               |416               |units_1
relu              |relu              |act_1
416               |448               |units_2
tanh              |tanh              |act_2
320               |None              |units_3
relu              |None              |act_3
160               |None              |units_4
tanh              |None              |act_4

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20

Our randomized search in space of parameters shows us that the peak accuracy is 30 percent when we use two hidden layers with 300+ nodes and different activation functions

I also tried the task with MLP classifier from Sklearn but the loss was greater than manually adding layers to a MLP

In [None]:
from sklearn.neural_network import MLPClassifier

# Create model object
clf = MLPClassifier(hidden_layer_sizes=(3,128),
                    random_state=42,
                    solver='adam',
                    batch_size=200,
                    verbose=True,
                    activation='relu',
                    learning_rate_init=0.001)

# Fit data onto the model
clf.fit(X_train,Y_train)

Iteration 1, loss = 6.40504900
Iteration 2, loss = 4.62790324
Iteration 3, loss = 4.50327608
Iteration 4, loss = 4.44926606
Iteration 5, loss = 4.40704275
Iteration 6, loss = 4.35908356
Iteration 7, loss = 4.32303185
Iteration 8, loss = 4.30264529
Iteration 9, loss = 4.29081092
Iteration 10, loss = 4.28227626
Iteration 11, loss = 4.27655414
Iteration 12, loss = 4.27272661
Iteration 13, loss = 4.26862740
Iteration 14, loss = 4.26614793
Iteration 15, loss = 4.26279083
Iteration 16, loss = 4.26130793
Iteration 17, loss = 4.25982259
Iteration 18, loss = 4.25799825
Iteration 19, loss = 4.25664262
Iteration 20, loss = 4.25501096
Iteration 21, loss = 4.25455361
Iteration 22, loss = 4.25320188
Iteration 23, loss = 4.25223059
Iteration 24, loss = 4.25136046
Iteration 25, loss = 4.25025176
Iteration 26, loss = 4.25039735




MLPClassifier(batch_size=200, hidden_layer_sizes=(3, 128), random_state=42,
              verbose=True)

[]