In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [19]:
import tensorflow as tf

In [20]:
#We'll assume that the default model is for MNIST digit recognition
def build_model(no_of_layers=1,architecture =[32],activation_func='relu',input_shape=(28,28),output_class=10):
    layers = [tf.keras.layers.Flatten(input_shape=input_shape)]
    if activation_func=='relu':
        activation = tf.nn.relu
    elif activation_func == 'sigmoid':
        activation =tf.nn.sigmoid
    elif activation_func == 'tanh':
        activation= tf.nn.tanh
    
    for i in range(no_of_layers):
        layers.append(tf.keras.layers.Dense(architecture[i],activation = activation))
    
    layers.append(tf.keras.layers.Dense(output_class , activation = tf.nn.softmax))
    model = tf.keras.models.Sequential(layers)
    return model

In [21]:

#Compiles and trains a given Keras model with the given data. 
#Assumes Adam optimizer for this implementation.
  
  #learning_rate: Learning rate for the optimizer Adam
  #batch_size: Batch size for the mini-batch optimization
  #epochs: Number of epochs to train
  #verbose: Verbosity of the training process

def compile_train_model(model,x_train, y_train,learning_rate=0.001,batch_size=1,epochs=10,verbose=0):
    model_copy = model
    model_copy.compile(optimizer=tf.keras.optimizers.Adam(lr=learning_rate),loss='sparse_categorical_crossentropy',metrics=['accuracy'])
    model_copy.fit(x_train, y_train, epochs=epochs, batch_size=batch_size,verbose=verbose)
    return model_copy

In [22]:

#Plots the graph between number of epochs vs accuracy(both training and validation)
#on the model as well as epochs vs loss(both model loss as well as validation loss)
#to give an idea of overfitting and helping to predict the right number of epochs required

def plot_loss_acc(model):
    val_loss=np.array(model.history.history['val_loss'])
    val_acc=np.array(model.history.history['val_acc'])
    loss=np.array(model.history.history['loss'])
    acc=np.array(model.history.history['acc'])
    epochs = range(1,len(acc)+1)
    plt.plot(epochs,acc,'bo',label = 'Training Accuracy')
    plt.plot(epochs,val_acc,'b',label= 'Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()

    plt.figure()

    plt.plot(epochs,loss,'bo',label = 'Training Loss')
    plt.plot(epochs,val_loss,'b',label= 'Validation Loss')
    plt.title('Training and Validation Loss')
    plt.legend()

    plt.show()

In [23]:
#Implementing different activation functions in OOP
class Sigmoid():
    def __call__(self, x):
        return 1 / (1 + np.exp(-x))

    def gradient(self, x):
        return self.__call__(x) * (1 - self.__call__(x))

class Softmax():
    def __call__(self, x):
        e_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
        return e_x / np.sum(e_x, axis=-1, keepdims=True)

    def gradient(self, x):
        p = self.__call__(x)
        return p * (1 - p)

class TanH():
    def __call__(self, x):
        return 2 / (1 + np.exp(-2*x)) - 1

    def gradient(self, x):
        return 1 - np.power(self.__call__(x), 2)

class ReLU():
    def __call__(self, x):
        return np.where(x >= 0, x, 0)

    def gradient(self, x):
        return np.where(x >= 0, 1, 0)

class LeakyReLU():
    def __init__(self, alpha=0.2):
        self.alpha = alpha

    def __call__(self, x):
        return np.where(x >= 0, x, self.alpha * x)

    def gradient(self, x):
        return np.where(x >= 0, 1, self.alpha)


In [24]:
#Implementing different optimizers in OOP
class StochasticGradientDescent():
    def __init__(self, learning_rate=0.01, momentum=0):
        self.learning_rate = learning_rate 
        self.momentum = momentum
        self.w_updt = None

    def update(self, w, grad_wrt_w):
        # If not initialized
        if self.w_updt is None:
            self.w_updt = np.zeros(np.shape(w))
        # Use momentum if set
        self.w_updt = self.momentum * self.w_updt + (1 - self.momentum) * grad_wrt_w
        # Move against the gradient to minimize loss
        return w - self.learning_rate * self.w_updt
class RMSprop():
    def __init__(self, learning_rate=0.01, rho=0.9):
        self.learning_rate = learning_rate
        self.Eg = None # Running average of the square gradients at w
        self.eps = 1e-8
        self.rho = rho

    def update(self, w, grad_wrt_w):
        # If not initialized
        if self.Eg is None:
            self.Eg = np.zeros(np.shape(grad_wrt_w))

        self.Eg = self.rho * self.Eg + (1 - self.rho) * np.power(grad_wrt_w, 2)

        # Divide the learning rate for a weight by a running average of the magnitudes of recent
        # gradients for that weight
        return w - self.learning_rate *  grad_wrt_w / np.sqrt(self.Eg + self.eps)

class Adam():
    def __init__(self, learning_rate=0.001, b1=0.9, b2=0.999):
        self.learning_rate = learning_rate
        self.eps = 1e-8
        self.m = None
        self.v = None
        # Decay rates
        self.b1 = b1
        self.b2 = b2

    def update(self, w, grad_wrt_w):
        # If not initialized
        if self.m is None:
            self.m = np.zeros(np.shape(grad_wrt_w))
            self.v = np.zeros(np.shape(grad_wrt_w))
        
        self.m = self.b1 * self.m + (1 - self.b1) * grad_wrt_w
        self.v = self.b2 * self.v + (1 - self.b2) * np.power(grad_wrt_w, 2)

        m_hat = self.m / (1 - self.b1)
        v_hat = self.v / (1 - self.b2)

        self.w_updt = self.learning_rate * m_hat / (np.sqrt(v_hat) + self.eps)

        return w - self.w_updt


In [25]:
#Implementing different kernel regularizers in OOP
class l1_regularization():
    """ Regularization for Lasso Regression """
    def __init__(self, alpha):
        self.alpha = alpha
    
    def __call__(self, w):
        return self.alpha * np.linalg.norm(w)

    def grad(self, w):
        return self.alpha * np.sign(w)

class l2_regularization():
    """ Regularization for Ridge Regression """
    def __init__(self, alpha):
        self.alpha = alpha
    
    def __call__(self, w):
        return self.alpha * 0.5 *  w.T.dot(w)

    def grad(self, w):
        return self.alpha * w

class l1_l2_regularization():
    """ Regularization for Elastic Net Regression """
    def __init__(self, alpha, l1_ratio=0.5):
        self.alpha = alpha
        self.l1_ratio = l1_ratio

    def __call__(self, w):
        l1_contr = self.l1_ratio * np.linalg.norm(w)
        l2_contr = (1 - self.l1_ratio) * 0.5 * w.T.dot(w) 
        return self.alpha * (l1_contr + l2_contr)

    def grad(self, w):
        l1_contr = self.l1_ratio * np.sign(w)
        l2_contr = (1 - self.l1_ratio) * w
        return self.alpha * (l1_contr + l2_contr) 
