# 0. L2 Regularization Cost
Write a function `def l2_reg_cost(cost, lambtha, weights, L, m):` that calculates the cost of a neural network with L2 regularization:

- `cost` is the cost of the network without L2 regularization
- `lambtha` is the regularization parameter
- `weights` is a dictionary of the weights and biases (`numpy.ndarrays`) of the neural network
- `L` is the number of layers in the neural network
- `m` is the number of data points used
- Returns: the cost of the network accounting for L2 regularization

In [1]:
import numpy as np


def l2_reg_cost(cost, lambtha, weights, L, m):
    W = [weights['W' + str(i + 1)] for i in range(L)]
    W = [np.linalg.norm(w) ** 2 for w in W]
    c = cost + lambtha * sum(W) / (2 * m)
    return c

In [2]:
#!/usr/bin/env python3

import numpy as np

if __name__ == '__main__':
    np.random.seed(0)

    weights = {}
    weights['W1'] = np.random.randn(256, 784)
    weights['W2'] = np.random.randn(128, 256)
    weights['W3'] = np.random.randn(10, 128)

    cost = np.abs(np.random.randn(1))

    print(cost)
    cost = l2_reg_cost(cost, 0.1, weights, 3, 1000)
    print(cost)
#     W = [weights['W' + str(i + 1)] for i in range(3)]
#     print(W)

[0.41842822]
[12.11229237]


# 1. Gradient Descent with L2 Regularization
Write a function `def l2_reg_gradient_descent(Y, weights, cache, alpha, lambtha, L):` that updates the weights and biases of a neural network using gradient descent with L2 regularization:

* `Y` is a one-hot `numpy.ndarray` of shape (`classes, m`) that contains the correct labels for the data
    * `classes` is the number of classes
    * `m` is the number of data points
* `weights` is a dictionary of the weights and biases of the neural network
* `cache` is a dictionary of the outputs of each layer of the neural network
* `alpha` is the learning rate
* `lambtha` is the L2 regularization parameter
* `L` is the number of layers of the network
* The neural network uses `tanh` activations on each layer except the last, which uses a `softmax` activation
* The weights and biases of the network should be updated in place

In [3]:
#!/usr/bin/env python3
'''
Modulus that updates the weights and biases of a neural
network using gradient descent with L2 regularization
'''
import numpy as np


def l2_reg_gradient_descent(Y, weights, cache, alpha, lambtha, L):
    '''
    Function that updates the weights and biases of a neural
    network using gradient descent with L2 regularization

    Parameters
    ----------
    Y : TYPE numpy.ndarray
        DESCRIPTION. Y is a one-hot numpy.ndarray of shape (classes, m)
        that contains the correct labels for the data
    weights : TYPE dictionary
        DESCRIPTION. Dictionary of the weights and biases of the
        neural network
    cache : TYPE dictionary
        DESCRIPTION. Dictionary of the outputs of each layer of
        the neural network
    alpha : TYPE float
        DESCRIPTION. Learning rate
    lambtha : TYPE float
        DESCRIPTION. L2 regularization parameter
    L : TYPE int
        DESCRIPTION. layers of the network

    Returns
    -------
    None.

    '''
    m = Y.shape[1]
    for i in reversed(range(L)):
        w = 'W' + str(i + 1)
        b = 'b' + str(i + 1)
        a = 'A' + str(i + 1)
        a_0 = 'A' + str(i)
        A = cache[a]
        A_dw = cache[a_0]
        if i == L - 1:
            dz = A - Y
            W = weights[w]
        else:
            da = 1 - (A * A)
            dz = np.matmul(W.T, dz)
            dz = dz * da
            W = weights[w]
        dw = np.matmul(A_dw, dz.T) / m
        db = np.sum(dz, axis=1, keepdims=True) / m
        weights[w] = weights[w] - alpha * (dw.T + (lambtha / m * weights[w]))
        weights[b] = weights[b] - alpha * db


In [4]:
#!/usr/bin/env python3

import numpy as np
# l2_reg_gradient_descent = __import__('1-l2_reg_gradient_descent').l2_reg_gradient_descent


def one_hot(Y, classes):
    """convert an array to a one-hot matrix"""
    m = Y.shape[0]
    one_hot = np.zeros((classes, m))
    one_hot[Y, np.arange(m)] = 1
    return one_hot

if __name__ == '__main__':
    lib= np.load('../data/MNIST.npz')
    X_train_3D = lib['X_train']
    Y_train = lib['Y_train']
    X_train = X_train_3D.reshape((X_train_3D.shape[0], -1)).T
    Y_train_oh = one_hot(Y_train, 10)

    np.random.seed(0)

    weights = {}
    weights['W1'] = np.random.randn(256, 784)
    weights['b1'] = np.zeros((256, 1))
    weights['W2'] = np.random.randn(128, 256)
    weights['b2'] = np.zeros((128, 1))
    weights['W3'] = np.random.randn(10, 128)
    weights['b3'] = np.zeros((10, 1))

    cache = {}
    cache['A0'] = X_train
    cache['A1'] = np.tanh(np.matmul(weights['W1'], cache['A0']) + weights['b1'])
    cache['A2'] = np.tanh(np.matmul(weights['W2'], cache['A1']) + weights['b2'])
    Z3 = np.matmul(weights['W3'], cache['A2']) + weights['b3']
    cache['A3'] = np.exp(Z3) / np.sum(np.exp(Z3), axis=0)
    print(weights['W1'])
    l2_reg_gradient_descent(Y_train_oh, weights, cache, 0.1, 0.1, 3)
    print(weights['W1'])

[[ 1.76405235  0.40015721  0.97873798 ...  0.52130375  0.61192719
  -1.34149673]
 [ 0.47689837  0.14844958  0.52904524 ...  0.0960042  -0.0451133
   0.07912172]
 [ 0.85053068 -0.83912419 -1.01177408 ... -0.07223876  0.31112445
  -1.07836109]
 ...
 [-0.60467085  0.54751161 -1.23317415 ...  0.82895532  1.44161136
   0.18972404]
 [-0.41044606  0.85719512  0.71789835 ... -0.73954771  0.5074628
   1.23022874]
 [ 0.43129249  0.60767018 -0.07749988 ... -0.26611561  2.52287972
   0.73131543]]
[[ 1.76405199  0.40015713  0.97873779 ...  0.52130364  0.61192707
  -1.34149646]
 [ 0.47689827  0.14844955  0.52904513 ...  0.09600419 -0.04511329
   0.07912171]
 [ 0.85053051 -0.83912402 -1.01177388 ... -0.07223874  0.31112438
  -1.07836088]
 ...
 [-0.60467073  0.5475115  -1.2331739  ...  0.82895516  1.44161107
   0.189724  ]
 [-0.41044598  0.85719495  0.71789821 ... -0.73954756  0.5074627
   1.2302285 ]
 [ 0.4312924   0.60767006 -0.07749987 ... -0.26611556  2.52287922
   0.73131529]]


# 2. L2 Regularization Cost
Write the function `def l2_reg_cost(cost):` that calculates the cost of a neural network with L2 regularization:

* `cost` is a tensor containing the cost of the network without L2 regularization
* Returns: a tensor containing the cost of the network accounting for L2 regularization

In [5]:
#!/usr/bin/env python3
'''
Moudulus that calculates the cost of a NN with l2 regularization
'''
import tensorflow.compat.v1 as tf


def l2_reg_cost(cost):
    '''
    Function that calculates the cost of a neural network
    with L2 regularization

    Parameters
    ----------
    cost : TYPE tensor
        DESCRIPTION. Tensor containing the cost
        of the network without L2 regularization

    Returns
    -------
    A tensor containing the cost of the network accounting for
    L2 regularization.

    '''
    cost = cost + tf.losses.get_regularization_loss()
    return cost




In [6]:
#!/usr/bin/env python3
'''
Moudulus that calculates the cost of a NN with l2 regularization
'''
import tensorflow.compat.v1 as tf


def l2_reg_cost(cost):
    '''
    Function that calculates the cost of a neural network
    with L2 regularization

    Parameters
    ----------
    cost : TYPE tensor
        DESCRIPTION. Tensor containing the cost
        of the network without L2 regularization

    Returns
    -------
    A tensor containing the cost of the network accounting for
    L2 regularization.

    '''
    return cost + tf.losses.get_regularization_losses()


## Ojo
* En los recursos leidos se tomaba get_regularization_loss. Esto genera un vector 1x1
* En el video de resolución se utiliza get_regularization_loss**es** esto arroja un vector de 1x3
* Si se corre varias veces el programa, la memoria va aumentando el tamaño del vector

In [7]:
#!/usr/bin/env python3

import numpy as np
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()
# l2_reg_cost = __import__('2-l2_reg_cost').l2_reg_cost

def one_hot(Y, classes):
    """convert an array to a one-hot matrix"""
    m = Y.shape[0]
    oh = np.zeros((classes, m))
    oh[Y, np.arange(m)] = 1
    return oh

np.random.seed(4)
m = np.random.randint(1000, 2000)
c = 10
lib= np.load('../data/MNIST.npz')

X = lib['X_train'][:m].reshape((m, -1))
Y = one_hot(lib['Y_train'][:m], c).T

n0 = X.shape[1]
n1, n2 = np.random.randint(10, 1000, 2)

lam = 0.09
tf.set_random_seed(0)

x = tf.placeholder(tf.float32, (None, n0))
y = tf.placeholder(tf.float32, (None, c))

a1 = tf.layers.Dense(n1, activation=tf.nn.tanh, kernel_initializer=tf.keras.initializers.VarianceScaling(scale=2.0, mode=("fan_avg")), kernel_regularizer=tf.keras.regularizers.L2(lam))(x)
a2 = tf.layers.Dense(n2, activation=tf.nn.sigmoid, kernel_initializer=tf.keras.initializers.VarianceScaling(scale=2.0, mode=("fan_avg")), kernel_regularizer=tf.keras.regularizers.L2(lam))(a1)
y_pred = tf.layers.Dense(c, activation=None, kernel_initializer=tf.keras.initializers.VarianceScaling(scale=2.0, mode=("fan_avg")), kernel_regularizer=tf.keras.regularizers.L2(lam))(a2)

cost = tf.losses.softmax_cross_entropy(y, y_pred)

l2_cost = l2_reg_cost(cost)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print(sess.run(l2_cost, feed_dict={x: X, y: Y}))


[56.430164 49.647583  6.044121]


# 3. Create a Layer with L2 Regularization
Write a function `def l2_reg_create_layer(prev, n, activation, lambtha):` that creates a tensorflow layer that includes L2 regularization:

`prev` is a tensor containing the output of the previous layer
`n` is the number of nodes the new layer should contain
`activation` is the activation function that should be used on the layer
`lambtha` is the L2 regularization parameter
Returns: the output of the new layer

In [8]:
#!/usr/bin/env python3
'''
Modulus that creates a tensorflow layer that includes
L2 regularization
'''
import tensorflow.compat.v1 as tf


def l2_reg_create_layer(prev, n, activation, lambtha):
    '''
    

    Parameters
    ----------
    prev : TYPE tensor
        DESCRIPTION. Is a tensor containing the output of the
        previous layer
    n : TYPE int
        DESCRIPTION. Number of nodes
    activation : TYPE tensor
        DESCRIPTION. Type of activation to be used in the layer 
    lambtha : TYPE float
        DESCRIPTION. L2 regularization parameter

    Returns
    -------
    The output of the new layer.

    '''
    init = tf.keras.initializers.VarianceScaling(scale=2.0,
                                                 mode=("fan_avg"))
    l2 = tf.keras.regularizers.L2(lambtha)
    layer = tf.layers.Dense(n,
                            activation=activation,
                            kernel_initializer=init,
                            kernel_regularizer=l2
                            )(prev)
    return layer


In [9]:
#!/usr/bin/env python3

import numpy as np
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()
# l2_reg_cost = __import__('2-l2_reg_cost').l2_reg_cost
# l2_reg_create_layer = __import__('3-l2_reg_create_layer').l2_reg_create_layer

def one_hot(Y, classes):
    """convert an array to a one-hot matrix"""
    m = Y.shape[0]
    one_hot = np.zeros((m, classes))
    one_hot[np.arange(m), Y] = 1
    return one_hot

if __name__ == '__main__':
    lib= np.load('../data/MNIST.npz')
    X_train_3D = lib['X_train']
    Y_train = lib['Y_train']
    X_train = X_train_3D.reshape((X_train_3D.shape[0], -1))
    Y_train_oh = one_hot(Y_train, 10)

    tf.set_random_seed(0)
    x = tf.placeholder(tf.float32, shape=[None, 784])
    y = tf.placeholder(tf.float32, shape=[None, 10])
    h1 = l2_reg_create_layer(x, 256, tf.nn.tanh, 0.05)
    y_pred = l2_reg_create_layer(x, 10, None, 0.)
    cost = tf.losses.softmax_cross_entropy(y, y_pred)
    l2_cost = l2_reg_cost(cost)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        print(sess.run(l2_cost, feed_dict={x: X_train, y: Y_train_oh}))


[56.440594  49.658012   6.0545483 41.180496   2.5326564]
