In [359]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import beta       
import seaborn as sns
import random
from sklearn.model_selection import train_test_split

Getting dataset from the files

In [360]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='latin1')
    return dict

def NormalizeData(X):
    X = X.astype('float64')
    X /= np.std(X, axis = 0)
    return X
batches =  []
batches.append(unpickle("data_batch_1"))
batches.append(unpickle("data_batch_2"))
batches.append(unpickle("data_batch_3"))
batches.append(unpickle("data_batch_4"))
batches.append(unpickle("data_batch_5"))

features = []
labels = []
for batch in batches:
    features.append(np.array(batch['data'].reshape((len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)))
    labels.append(batch['labels'])
for feat in features:
    feat = NormalizeData(feat[:])
for lab in labels:
    maxval = np.max(lab)
    lab = np.eye(maxval+1)[lab]

In [361]:
class MISC:
    def cost_derivative(self, aFunc, y):
        derivative = aFunc - y
        return derivative
    
    def sigmoid(self, z):
        return 1.0/(1.0+np.exp(-z))
    
    def sigmoid_derivative(self, z):
        return self.sigmoid(z)*(1-self.sigmoid(z))

    def relu(z):
        return np.maximum(0,z)
    
    def relu_derivative(z):
        z[z<=0] = 0
        z[z>0] = 1
        return z

    def tanh(z):
        return (np.exp(z) - np.exp(-z)) / (np.exp(z) + np.exp(-z))
    
    def tanh_derivative(self, z):
        return 1 - np.power(self.tanh(z), 2)

    def softmax(x):
        e = np.exp(x-np.max(x))
        s = np.sum(e, axis=1, keepdims=True)
        return e/s   
    
    def softmax_derivative(self, z):
        return self.softmax(z)*self.softmax(1-z)

    
    def stringToFunc(self, stringAFunc, x):
        if (stringAFunc.equals("Relu")): return self.relu(x)
        elif (stringAFunc.equals("Sigmoid")): return self.sigmoid(x)
        elif (stringAFunc.equals("tanh")): return self.tanh(x)
        else: return self.softmax(x)

    def stringToFuncDer(self, stringAFunc, x):
        if (stringAFunc.equals("Relu")): return self.relu_derivative(x)
        elif (stringAFunc.equals("Sigmoid")): return self.sigmoid_derivative(x)
        elif (stringAFunc.equals("tanh")): return self.tanh_derivative(x)
        else: return self.softmax_derivative(x)



Multilayer perceptron algorithm:

In [362]:
class MLP:
    
    def __init__(self, aFunc, numOfHiddenLayers, numOfHiddenUnits):
        self.aFunc = aFunc
        self.numOfHiddenLayers = numOfHiddenLayers
        self.numOfHiddenUnits = numOfHiddenUnits

        self.weights = {}
        self.biases = {}

        new_arr = np.append([10], numOfHiddenUnits)
        numOfUnitsSum = np.append(new_arr, [1024])

        for i in range(numOfHiddenLayers + 1):
            self.weights[i + 1] = np.random.randn(numOfUnitsSum[i], numOfUnitsSum[i + 1])
            self.biases[i + 1] = np.zeros(numOfUnitsSum[i + 1])
            
        # i = 1
        # # Initialize weights and biases for hidden layers
        # for i in range(self.numOfHiddenLayers):
        #     self.weights[i + 1] = (np.random.randn(self.numOfHiddenUnits[i], self.numOfHiddenUnits[i-1]))
        #     self.biases[i + 1] = (np.zeros(self.numOfHiddenUnits[i]))
        
        # # Initialize weights and biases for output layer
        # self.weights[i + 1] = np.random.randn(10, self.numOfHiddenUnits[-1])
        # self.biases[i + 1] = np.zeros(10)
        
            
    def fit(self, x, y, optimizer):
        
        def backprop(self, Mlp, x, y):
            z = {}
            a = {1: x}  
            
            for i in range(1, Mlp.numOfHiddenLayers + 2):
                z[i + 1] = np.dot(a[i], Mlp.weights[i]) + Mlp.biases[i]
                a[i + 1] = MISC.stringToFunc(Mlp.aFunc[i + 1], z[i + 1])

            delta = MISC.cost_derivative(a[-1], y) * MISC.softmax_derivative(a[-1])
            dw = np.dot(a[Mlp.numOfHiddenLayers + 1].T, delta)
        
            update_params = {
                 self.numOfHiddenLayers + 1: (dw, delta)
            }

            for i in reversed(range(2, Mlp.numOfHiddenLayers + 2)):
                dr = MISC.stringToFuncDer(Mlp.aFunc[i], z[i])
                delta = np.dot(delta, Mlp.weights[i].T) * dr
                dw = np.dot(a[i - 1].T, delta)
                update_params[i - 1] = (dw, delta)

            return update_params


        self = optimizer.run(self,backprop, x, y)
        return self
            
    def predict(self, x):
        z = x
        for v in self.params[:-1]:
            z = self.aFunc(np.dot(x, v)) #N x M
            x = z
        yh = MISC.softmax(np.dot(z, self.params[-1]))#N
        return yh    

GradientDescent Algorithm

In [363]:
class GradientDescent:
    
    def __init__(self, learning_rate=.001, batch_size=16, max_iters=1e4, epsilon=1e-8, numOfHiddenLayers=0):
        self.numOfHiddenLayers = numOfHiddenLayers
        self.learning_rate = learning_rate
        self.max_iters = max_iters
        self.epsilon = epsilon
        self.batch_size = batch_size
        
    def run(self, mlp,gradient_fn, x, y):
        m = x.shape
        num_batches = m[0] // self.batch_size
        norms = np.array([np.inf])
        t = 1
        while np.any(norms > self.epsilon) and t < self.max_iters:
            #permutation = np.random.permutation(len(x))
            x_shuffle = x#[permutation]
            y_shuffle = y#[permutation]
            
            for a in range(num_batches):
                start = a * self.batch_size
                end = start + self.batch_size
                x_batch = x_shuffle[start:end:]
                y_batch = y_shuffle[start:end:]

                # Compute the gradient 
                grad = gradient_fn(self,mlp,x_batch, y_batch)
                for k, v in grad.items():
                    dw = v[0]
                    # dw += (self.lambd) * self.w[k] 
                    self.weights[k] -= self.learning_rate * dw
                    self.biases[k] -= self.learning_rate * np.mean(v[1], 0)
                # for p in range(len(params)):
                #     params[p] -= self.learning_rate * grad[p]
                # norms = np.array([np.linalg.norm(g) for g in grad])
            t += 1
        return self

In [364]:
no_hidden = MLP((MISC.softmax),1,[64])
optimizer = GradientDescent(numOfHiddenLayers=0)
fit_no_hidden = no_hidden.fit(features[0],labels[0], optimizer)
optimizer = GradientDescent(numOfHiddenLayers=1)
single_layer=  MLP((MISC.relu, MISC.softmax),1,(256,))
fit_single_layer = single_layer.fit(batches[0][0],batches[0][1],optimizer)
optimizer = GradientDescent(numOfHiddenLayers=2)
two_layers = MLP((MISC.relu, MISC.softmax),2,(256,256))
fit_two_layers = two_layers.fit(batches[0][0],batches[0][1],optimizer)

ValueError: shapes (10,64) and (16,32,32,3) not aligned: 64 (dim 1) != 32 (dim 2)