In [None]:
#imports
import tensorflow.compat.v1.keras as K
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()

In [None]:
"""Task 0"""
def build_model(nx, layers, activations, lambtha, keep_prob):
    """Builds a neural network with Keras
    nx: is the number of input features to the network
    layers: list containing the number of nodes in each layer
        of the network
    activations: list containing the activation functions
        used for each layer of the network
    lambtha: is the L2 regularization parameter
    keep_prob: is the probability that a node will be kept for dropout

    Returns: the keras model
    """
    reg = K.regularizers.l2(lambtha)
    model = K.Sequential()
    for i in range(len(layers)):
        if i == 0:
            model.add(K.layers.Dense(layers[i], activation=activations[i],
                                     kernel_regularizer=reg,
                                     input_shape=(nx,)))
        else:
            model.add(K.layers.Dropout(1 - keep_prob))
            model.add(K.layers.Dense(layers[i], activation=activations[i],
                                     kernel_regularizer=reg))
    return model

In [None]:
"""Task 0: main file"""
if __name__ == '__main__':
    network = build_model(784, [256, 256, 10], ['tanh', 'tanh', 'softmax'], 0.001, 0.95)
    network.summary()
    print(network.losses)

In [None]:
"""Task 1"""
def build_model(nx, layers, activations, lambtha, keep_prob):
    """Builds a neural network with Keras
    nx: is the number of input features to the network
    layers: list containing the number of nodes in each layer
        of the network
    activations: list containing the activation functions
        used for each layer of the network
    lambtha: is the L2 regularization parameter
    keep_prob: is the probability that a node will be kept for dropout

    Returns: the keras model"""
    reg = K.regularizers.l2(lambtha)

    inputs = K.Input(shape=(nx,))
    for i in range(len(layers)):
        if i == 0:
            layer = K.layers.Dense(layers[i], activation=activations[i],
                                   kernel_regularizer=reg)(inputs)
        else:
            layer = K.layers.Dense(layers[i], activation=activations[i],
                                   kernel_regularizer=reg)(layer)
        if i < len(layers) - 1:
            layer = K.layers.Dropout(1 - keep_prob)(layer)
    model = K.Model(inputs=inputs, outputs=layer)
    return model

In [None]:
"""Task 1: main file"""
if __name__ == '__main__':
    network = build_model(784, [256, 256, 10], ['tanh', 'tanh', 'softmax'], 0.001, 0.95)
    network.summary()
    print(network.losses)

In [None]:
"""Task 2"""
def optimize_model(network, alpha, beta1, beta2):
    """Sets up Adam optimization for a keras model with categorical
        crossentropy loss and accuracy metrics.
        Args:
            network: is the model to optimize
            alpha: is the learning rate
            beta1: is the first Adam optimization parameter
            beta2: is the second Adam optimization parameter

        Returns: 
            None
    """

    network.compile(optimizer=K.optimizers.Adam(lr=alpha,
                                                beta_1=beta1,
                                                beta_2=beta2),
                    loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
"""Task 2: main file"""
if __name__ == '__main__':
    model = build_model(784, [256, 256, 10], ['tanh', 'tanh', 'softmax'], 0.001, 0.95)
    optimize_model(model, 0.01, 0.99, 0.9)
    print(model.loss)
    print(model.metrics)
    opt = model.optimizer
    print(opt.__class__)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        #print(sess.run([opt.lr, opt.beta_1, opt.beta_2]))

In [29]:
"""Task 3"""
def one_hot(labels, classes=None):
    """Converts a label vector into a one-hot matrix
    Args:
        labels: is a numpy.ndarray with shape (m,) containing numeric class labels
        classes: is the maximum number of classes found in labels
    Conditions:
        The last dimension of the one-hot matrix must be the number of classes
    Returns:
        one-hot matrix"""

    return K.utils.to_categorical(labels, classes)

In [31]:
"""Task 3: main file"""
import numpy as np

if __name__ == '__main__':
    labels = np.load('../data/MNIST.npz')['Y_train'][:10]
    print(labels)
    print(one_hot(labels))   

[5 0 4 1 9 2 1 3 1 4]
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]


In [32]:
"""Task 4"""
def train_model(network, data, labels, batch_size, epochs, verbose=True, shuffle=False):
    """Trains a model using mini-batch gradient descent
    Args:
        network: is the model to train
        data: is a numpy.ndarray of shape (m, nx) containing the input data
        labels: is a one-hot numpy.ndarray of shape (m, classes) containing the labels of data
        batch_size: is the size of the batch used for mini-batch gradient descent
        epochs: is the number of passes through data for mini-batch gradient descent
        verbose: is a boolean that determines if output should be printed during training
        shuffle: is a boolean that determines whether to shuffle the batches every epoch.
            Normally, it is a good idea to shuffle, but for reproducibility, we have chosen
            to set the default to False.
    Returns: the History object generated after training the model"""

    return network.fit(data, labels, batch_size=batch_size, epochs=epochs,
                       verbose=verbose, shuffle=shuffle)

In [33]:
"""Task 4: main file"""
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
import tensorflow.compat.v1 as tf
tf.set_random_seed(SEED)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.backend.set_session(sess)


if __name__ == '__main__':
    datasets = np.load('../data/MNIST.npz')
    X_train = datasets['X_train']
    X_train = X_train.reshape(X_train.shape[0], -1)
    Y_train = datasets['Y_train']
    Y_train_oh = one_hot(Y_train)

    lambtha = 0.0001
    keep_prob = 0.95
    network = build_model(784, [256, 256, 10], ['relu', 'relu', 'softmax'], lambtha, keep_prob)
    alpha = 0.001
    beta1 = 0.9
    beta2 = 0.999
    optimize_model(network, alpha, beta1, beta2)
    batch_size = 64
    epochs = 5
    train_model(network, X_train, Y_train_oh, batch_size, epochs)




  super().__init__(name, **kwargs)


Train on 50000 samples


2023-03-04 15:56:08.601623: W tensorflow/c/c_api.cc:291] Operation '{name:'decay_10/Assign' id:3036 op device:{requested: '', assigned: ''} def:{{{node decay_10/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](decay_10, decay_10/Initializer/initial_value)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [34]:
"""Task 5"""
def train_model(network, data, labels, batch_size, epochs,
                validation_data=None, verbose=True, shuffle=False):
    """Trains a model using mini-batch gradient descent and also validates the model
    Args:
        network: is the model to train
        data: is a numpy.ndarray of shape (m, nx) containing the input data
        labels: is a one-hot numpy.ndarray of shape (m, classes) containing the labels of data
        batch_size: is the size of the batch used for mini-batch gradient descent
        epochs: is the number of passes through data for mini-batch gradient descent
        validation_data: is the data to validate the model with, if not None
        verbose: is a boolean that determines if output should be printed during training
        shuffle: is a boolean that determines whether to shuffle the batches every epoch.
            Normally, it is a good idea to shuffle, but for reproducibility, we have chosen
            to set the default to False.
    Returns: the History object generated after training the model"""

    return network.fit(data, labels, batch_size=batch_size,
                       epochs=epochs,validation_data=validation_data,
                       verbose=verbose, shuffle=shuffle)

In [35]:
"""Task 5: main file"""
SEED = 0

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
import tensorflow.compat.v1 as tf
tf.set_random_seed(SEED)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.backend.set_session(sess)


if __name__ == '__main__':
    datasets = np.load('../data/MNIST.npz')
    X_train = datasets['X_train']
    X_train = X_train.reshape(X_train.shape[0], -1)
    Y_train = datasets['Y_train']
    Y_train_oh = one_hot(Y_train)
    X_valid = datasets['X_valid']
    X_valid = X_valid.reshape(X_valid.shape[0], -1)
    Y_valid = datasets['Y_valid']
    Y_valid_oh = one_hot(Y_valid)

    lambtha = 0.0001
    keep_prob = 0.95
    network = build_model(784, [256, 256, 10], ['relu', 'relu', 'softmax'], lambtha, keep_prob)
    alpha = 0.001
    beta1 = 0.9
    beta2 = 0.999
    optimize_model(network, alpha, beta1, beta2)
    batch_size = 64
    epochs = 5
    train_model(network, X_train, Y_train_oh, batch_size, epochs, validation_data=(X_valid, Y_valid_oh))

  super().__init__(name, **kwargs)


Train on 50000 samples, validate on 10000 samples


2023-03-04 16:11:46.168634: W tensorflow/c/c_api.cc:291] Operation '{name:'training_2/Adam/dense_57/bias/m/Assign' id:4430 op device:{requested: '', assigned: ''} def:{{{node training_2/Adam/dense_57/bias/m/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](training_2/Adam/dense_57/bias/m, training_2/Adam/dense_57/bias/m/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


Epoch 1/5

  updates = self.state_updates
2023-03-04 16:11:50.743997: W tensorflow/c/c_api.cc:291] Operation '{name:'loss_16/AddN_1' id:4305 op device:{requested: '', assigned: ''} def:{{{node loss_16/AddN_1}} = AddN[N=2, T=DT_FLOAT, _has_manual_control_dependencies=true](loss_16/mul, loss_16/AddN)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
