# 0. Sequential
Write a function `def build_model(nx, layers, activations, lambtha, keep_prob):` that builds a neural network with the Keras library:

* `nx` is the number of input features to the network
* `layers` is a `list` containing the number of nodes in each layer of the network
* `activations` is a `list` containing the activation functions used for each layer of the network
* `lambtha` is the L2 regularization parameter
* `keep_prob` is the probability that a node will be kept for dropout
* You are not allowed to use the Input class
* Returns: the keras model

In [None]:
#!/usr/bin/env python3
'''
Modulus that creates a neural network with
keras library
'''
import tensorflow.keras as K


def build_model(nx, layers, activations, lambtha, keep_prob):
    '''
    Function that builds a neural network with the Keras library

    Parameters
    ----------
    nx : TYPE int
        DESCRIPTION. Number of input features to the network
    layers : TYPE list
        DESCRIPTION. List containing the number of nodes in each
        layer of the network
    activations : TYPE list
        DESCRIPTION. List containing the activation functions used for
        each layer of the network
    lambtha : TYPE float
        DESCRIPTION. Is the L2 regularization parámeter
    keep_prob : TYPE float
        DESCRIPTION. probability that a node will be kept of dropout

    Returns
    -------
    The keras model.

    '''
    model = K.models.Sequential()
    la2 = K.regularizers.l2(lambtha)
    i = 0
    for layer, activa in zip(layers, activations):
        if i == 0:
            model.add(K.layers.Dense(layer,
                                     input_dim=nx,
                                     activation=activa,
                                     kernel_regularizer=la2))
            i += 1
        else:
            drop = K.layers.Dropout(rate=1 - keep_prob)
            model.add(drop)
            model.add(K.layers.Dense(layer,
                                     activation=activa,
                                     kernel_regularizer=la2))
    return model


In [None]:
#!/usr/bin/env python3

# Force Seed - fix for Keras
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
import tensorflow as tf
tf.random.set_seed(SEED)
import tensorflow.keras as K

# build_model = __import__('0-sequential').build_model

if __name__ == '__main__':
    network = build_model(784, [256, 256, 10], ['tanh', 'tanh', 'softmax'], 0.001, 0.95)
    network.summary()
    print(network.losses)
    print(network.weights)

# 1. Input
Write a function `def build_model(nx, layers, activations, lambtha, keep_prob):` that builds a neural network with the Keras library:

* `nx` is the number of input features to the network
* `layers` is a list containing the number of nodes in each layer of the network
* `activations` is a list containing the activation functions used for each layer of the network
* `lambtha` is the L2 regularization parameter
* `keep_prob` is the probability that a node will be kept for dropout
* You are not allowed to use the `Sequential` class
* Returns: the keras model

In [1]:
#!/usr/bin/env python3
'''
Modulus that builds a NN with keares
Not using Sequential class
'''
import tensorflow.keras as K


def build_model(nx, layers, activations, lambtha, keep_prob):
    '''
    Function that builds a neural network with the Keras library
    It is not allowed to use Sequential class

    Parameters
    ----------
    nx : TYPE. int
        DESCRIPTION. Number of input features
    layers : TYPE. list
        DESCRIPTION. List containing number of nodes in each layer
    activations : TYPE. list
        DESCRIPTION. List containing the activation function in each layer
    lambtha : TYPE. float
        DESCRIPTION. Regularization parameter
    keep_prob : TYPE. float
        DESCRIPTION. Probability that a node will be kept for dropout

    Returns
    -------
    Keras model.

    '''
    Inputs = K.Input(shape=(nx,))
    l2 = K.regularizers.l2(lambtha)
    i = 0
    for lay, act in zip(layers, activations):
        if i == 0:
            L = K.layers.Dense(lay,
                               activation=act,
                               kernel_regularizer=l2)(Inputs)
            i += 1
        else:
            L = K.layers.Dropout(1 - keep_prob)(L)
            L = K.layers.Dense(lay,
                               activation=act,
                               kernel_regularizer=l2)(L)
    model = K.Model(inputs=Inputs, outputs=L)
    return model




In [None]:
#!/usr/bin/env python3

# Force Seed - fix for Keras
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
import tensorflow as tf
tf.random.set_seed(SEED)
import tensorflow.keras as K
# build_model = __import__('1-input').build_model

if __name__ == '__main__':
    network = build_model(784, [256, 256, 10], ['tanh', 'tanh', 'softmax'], 0.001, 0.95)
    network.summary()
    print(network.losses)

# 2. Optimize
Write a function `def optimize_model(network, alpha, beta1, beta2):` that sets up Adam optimization for a keras model with categorical crossentropy loss and accuracy metrics:

* `network` is the model to optimize
* `alpha` is the learning rate
* `beta1` is the first Adam optimization parameter
* `beta2` is the second Adam optimization parameter
* Returns: None

In [2]:
#!/usr/bin/env python3
'''
Modulus that sets up Adam optimization for keras model
with categoriacl crossentropy loss and accuracy metrics
'''
import tensorflow.keras as K


def optimize_model(network, alpha, beta1, beta2):
    '''
    Function that sets up Adam optimization for a keras model with
    categorical crossentropy loss and accuracy metrics

    Parameters
    ----------
    network : TYPE tensor
        DESCRIPTION. Model to optimize
    alpha : TYPE float
        DESCRIPTION. Learning rate
    beta1 : TYPE Float
        DESCRIPTION. First Adam optimization parameter
    beta2 : TYPE float
        DESCRIPTION. Second Adam optimization parameter

    Returns
    -------
    None.

    '''
    opt = K.optimizers.Adam(learning_rate=alpha,
                            beta_1=beta1,
                            beta_2=beta2)
    network.compile(optimizer=opt,
                    loss=K.losses.CategoricalCrossentropy(),
                    metrics=['accuracy'])
    return None


In [None]:
#!/usr/bin/env python3

import tensorflow as tf

# build_model = __import__('1-input').build_model
# optimize_model = __import__('2-optimize').optimize_model

if __name__ == '__main__':
    model = build_model(784, [256, 256, 10], ['tanh', 'tanh', 'softmax'], 0.001, 0.95)
    optimize_model(model, 0.01, 0.99, 0.9)
    print(model.loss)
    opt = model.optimizer
    print(opt.__class__)
    print(tuple(map(lambda x: x.numpy(),(opt.lr, opt.beta_1, opt.beta_2))))


# 3. One Hot
Write a function `def one_hot(labels, classes=None):` that converts a label vector into a one-hot matrix:

* The last dimension of the one-hot matrix must be the number of classes
* Returns: the one-hot matrix

In [3]:
#!/usr/bin/env python3
'''
Modulus that converts a label vector in to
one-hot matrix
'''
import tensorflow.keras as K


def one_hot(labels, classes=None):
    '''
    Function that converts a label vector into a one-hot matrix

    Parameters
    ----------
    labels : TYPE vector
        DESCRIPTION. Vector to be converted in one-hot matrix
    classes : TYPE, optional
        DESCRIPTION. The default is None.

    Returns
    -------
    None. The last dimension must be number of classes

    '''
    return K.utils.to_categorical(labels, classes, dtype='float32')


In [None]:
#!/usr/bin/env python3

import numpy as np
# one_hot = __import__('3-one_hot').one_hot

if __name__ == '__main__':
    labels = np.load('../data/MNIST.npz')['Y_train'][:10]
    print(labels)
    print(one_hot(labels))

# 4. Train
Write a function  `def train_model(network, data, labels, batch_size, epochs, verbose=True, shuffle=False):` that trains a model using mini-batch gradient descent:

* `network` is the model to train
* `data` is a `numpy.ndarray` of shape (`m, nx`) containing the input data
* `labels` is a one-hot `numpy.ndarray` of shape (`m, classes`) containing the labels of data
* `batch_size` is the size of the batch used for mini-batch gradient descent
* `epochs` is the number of passes through data for mini-batch gradient descent
* `verbose` is a `boolean` that determines if output should be printed during training
* `shuffle` is a `boolean` that determines whether to shuffle the batches every epoch. Normally, it is a good idea to shuffle, but for reproducibility, we have chosen to set the default to False.
* Returns: the History object generated after training the model

In [None]:
#!/usr/bin/env python3
'''
Modulus that trins a model using mini-batch gradien descent
'''
import tensorflow.keras as K


def train_model(network, data, labels, batch_size, epochs,
                verbose=True, shuffle=False):
    '''
    Function that trains a model using mini-batch gradient descent

    Parameters
    ----------
    network : TYPE model
        DESCRIPTION. Model to be train
    data : TYPE numpy.ndarray
        DESCRIPTION. data is a numpy.ndarray of shape (m, nx) containing
        the input data
    labels : TYPE numpy.ndarray
        DESCRIPTION. (m, classes) containing the labels of data
    batch_size : TYPE int
        DESCRIPTION. Batch size used for mini-batch gradient descent
    epochs : TYPE int
        DESCRIPTION. Number of passes through data for mini-batch g.d.
    verbose : TYPE, optional
        DESCRIPTION. The default is True. Determines if output should be
        printed during the training
    shuffle : TYPE, optional
        DESCRIPTION. The default is False. Determines if shuffle the batches
        every epoch

    Returns
    -------
    History object generated after training model.

    '''
    return network.fit(x=data,
                       y=labels,
                       batch_size=batch_size,
                       epochs=epochs,
                       verbose=verbose,
                       shuffle=shuffle)


In [None]:
#!/usr/bin/env python3
"""
Main file
"""

# Force Seed - fix for Keras
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
import tensorflow as tf
tf.random.set_seed(SEED)
import tensorflow.keras as K

# Imports
# build_model = __import__('1-input').build_model
# optimize_model = __import__('2-optimize').optimize_model
# one_hot = __import__('3-one_hot').one_hot
# train_model = __import__('4-train').train_model


if __name__ == '__main__':
    datasets = np.load('../data/MNIST.npz')
    X_train = datasets['X_train']
    X_train = X_train.reshape(X_train.shape[0], -1)
    Y_train = datasets['Y_train']
    Y_train_oh = one_hot(Y_train)

    lambtha = 0.0001
    keep_prob = 0.95
    network = build_model(784, [256, 256, 10], ['relu', 'relu', 'softmax'], lambtha, keep_prob)
    alpha = 0.001
    beta1 = 0.9
    beta2 = 0.999
    optimize_model(network, alpha, beta1, beta2)
    batch_size = 64
    epochs = 5
    train_model(network, X_train, Y_train_oh, batch_size, epochs)

# 5. Validate
Based on `4-train.py`, update the function `def train_model(network, data, labels, batch_size, epochs, validation_data=None, verbose=True, shuffle=False):` to also analyze validaiton data:

* `validation_data` is the data to validate the model with, if not None


In [4]:
#!/usr/bin/env python3
'''
Modulus that trins a model using mini-batch gradien descent
'''
import tensorflow.keras as K


def train_model(network, data, labels, batch_size, epochs,
                validation_data=None, verbose=True, shuffle=False):
    '''
    Based on 4-train.py, update the function train_model
    to also analyze validaiton data

    Parameters
    ----------
    network : TYPE model
        DESCRIPTION. Model to be train
    data : TYPE numpy.ndarray
        DESCRIPTION. data is a numpy.ndarray of shape (m, nx) containing
        the input data
    labels : TYPE numpy.ndarray
        DESCRIPTION. (m, classes) containing the labels of data
    batch_size : TYPE int
        DESCRIPTION. Batch size used for mini-batch gradient descent
    epochs : TYPE int
        DESCRIPTION. Number of passes through data for mini-batch g.d.
    validation_data : TYPE, optional
        DESCRIPTION. Dato to validate the model.
    verbose : TYPE, optional
        DESCRIPTION. The default is True.
    shuffle : TYPE, optional
        DESCRIPTION. The default is False.

    Returns
    -------
    None.

    '''
    if validation_data:
        validation_data = validation_data
    else:
        validation_data = None
    return network.fit(x=data,
                       y=labels,
                       batch_size=batch_size,
                       epochs=epochs,
                       validation_data=validation_data,
                       verbose=verbose,
                       shuffle=shuffle)


In [5]:
#!/usr/bin/env python3
"""
Main file
"""

# Force Seed - fix for Keras
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
import tensorflow as tf
tf.random.set_seed(SEED)
import tensorflow.keras as K

# Imports
# build_model = __import__('1-input').build_model
# optimize_model = __import__('2-optimize').optimize_model
# one_hot = __import__('3-one_hot').one_hot
# train_model = __import__('5-train').train_model

if __name__ == '__main__':
    datasets = np.load('../data/MNIST.npz')
    X_train = datasets['X_train']
    X_train = X_train.reshape(X_train.shape[0], -1)
    Y_train = datasets['Y_train']
    Y_train_oh = one_hot(Y_train)
    X_valid = datasets['X_valid']
    X_valid = X_valid.reshape(X_valid.shape[0], -1)
    Y_valid = datasets['Y_valid']
    Y_valid_oh = one_hot(Y_valid)

    lambtha = 0.0001
    keep_prob = 0.95
    network = build_model(784, [256, 256, 10], ['relu', 'relu', 'softmax'], lambtha, keep_prob)
    alpha = 0.001
    beta1 = 0.9
    beta2 = 0.999
    optimize_model(network, alpha, beta1, beta2)
    batch_size = 64
    epochs = 5
    train_model(network, X_train, Y_train_oh, batch_size, epochs, validation_data=(X_valid, Y_valid_oh))


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


# 6. Early Stopping
Based on `5-train.py`, update the function `def train_model(network, data, labels, batch_size, epochs, validation_data=None, early_stopping=False, patience=0, verbose=True, shuffle=False):` to also train the model using early stopping:

* `early_stopping` is a `boolean` that indicates whether early stopping should be used
    * early stopping should only be performed if validation_data exists
    * early stopping should be based on validation loss
* patience is the patience used for early stopping

In [6]:
#!/usr/bin/env python3
'''
Modulus that trins a model using mini-batch gradien descent
'''
import tensorflow.keras as K


def train_model(network, data, labels, batch_size, epochs,
                validation_data=None, early_stopping=False, patience=0,
                verbose=True, shuffle=False):
    '''
    Based on 4-train.py, update the function train_model
    to also analyze validaiton data

    Parameters
    ----------
    network : TYPE model
        DESCRIPTION. Model to be train
    data : TYPE numpy.ndarray
        DESCRIPTION. data is a numpy.ndarray of shape (m, nx) containing
        the input data
    labels : TYPE numpy.ndarray
        DESCRIPTION. (m, classes) containing the labels of data
    batch_size : TYPE int
        DESCRIPTION. Batch size used for mini-batch gradient descent
    epochs : TYPE int
        DESCRIPTION. Number of passes through data for mini-batch g.d.
    validation_data : TYPE, optional
        DESCRIPTION. Dato to validate the model.
    early_stopping : TYPE boolean
        DESCRIPTION. Indicates if early stopping should be used
    patience : TYPE int
        DESCRIPTION. is the patience used for early stopping
    verbose : TYPE, optional
        DESCRIPTION. The default is True.
    shuffle : TYPE, optional
        DESCRIPTION. The default is False.

    Returns
    -------
    None.

    '''
    if validation_data:
        validation_data = validation_data
        early = K.callbacks.EarlyStopping(monitor='val_loss',
                                          patience=patience)
    else:
        validation_data = None
        early = None
    return network.fit(x=data,
                       y=labels,
                       batch_size=batch_size,
                       epochs=epochs,
                       verbose=verbose,
                       validation_data=validation_data,
                       shuffle=shuffle,
                       callbacks=[early])


In [8]:
#!/usr/bin/env python3
"""
Main file
"""

# Force Seed - fix for Keras
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
import tensorflow as tf
tf.random.set_seed(SEED)
import tensorflow.keras as K

# Imports
# build_model = __import__('1-input').build_model
# optimize_model = __import__('2-optimize').optimize_model
# one_hot = __import__('3-one_hot').one_hot
# train_model = __import__('6-train').train_model


if __name__ == '__main__':
    datasets = np.load('../data/MNIST.npz')
    X_train = datasets['X_train']
    X_train = X_train.reshape(X_train.shape[0], -1)
    Y_train = datasets['Y_train']
    Y_train_oh = one_hot(Y_train)
    X_valid = datasets['X_valid']
    X_valid = X_valid.reshape(X_valid.shape[0], -1)
    Y_valid = datasets['Y_valid']
    Y_valid_oh = one_hot(Y_valid)

    lambtha = 0.0001
    keep_prob = 0.95
    network = build_model(784, [256, 256, 10], ['relu', 'relu', 'softmax'], lambtha, keep_prob)
    alpha = 0.001
    beta1 = 0.9
    beta2 = 0.999
    optimize_model(network, alpha, beta1, beta2)
    batch_size = 64
    epochs = 30
    train_model(network, X_train, Y_train_oh, batch_size, epochs,
                validation_data=(X_valid, Y_valid_oh), early_stopping=True,
                patience=3)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
