In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import beta       
import seaborn as sns
import random
from sklearn.model_selection import train_test_split

Getting dataset from the files

In [7]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='latin1')
        # X=datadict['data'].reshape((len(datadict['data']), 3, 32, 32)).transpose(0, 2, 3, 1)
        # dict_new=dict['data'].reshape((len(dict['data']), 3, 32, 32)).transpose(0, 2, 3, 1)
    return dict

def NormalizeData(X):
    X = X.astype('float64')
    X /= np.std(X, axis = 0)
    return X
batches =  []
batches.append(unpickle("data_batch_1"))
batches.append(unpickle("data_batch_2"))
batches.append(unpickle("data_batch_3"))
batches.append(unpickle("data_batch_4"))
batches.append(unpickle("data_batch_5"))
# batch_test = unpickle("test_batch")
# batch_meta = unpickle("batches.meta")
features = []
labels = []
for batch in batches:
    features.append(np.array(batch['data'].reshape((len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)))
    labels.append(batch['labels'])
for feat in features:
    feat = NormalizeData(feat[:])
for lab in labels:
    maxval = np.max(lab)
    lab = np.eye(maxval+1)[lab]

In [8]:
class MISC:    
    def cost_derivative(self, aFunc, y):
        derivative = aFunc - y
        return derivative
    
    def sigmoid(self, z):
        return 1.0/(1.0+np.exp(-z))
    def sigmoid_derivative(self, z):
        return self.sigmoid(z)*(1-self.sigmoid(z))

    def relu(z):
      return np.maximum(0,z)
    def relu_derivative(z):
        z[z<=0] = 0
        z[z>0] = 1
        return z

    def tanh(z):
	    return (np.exp(z) - np.exp(-z)) / (np.exp(z) + np.exp(-z))
    def tanh_derivative(self, z):
        return 1 - np.power(self.tanh(z), 2)

    def softmax(x):
      e = np.exp(x-np.max(x))
      s = np.sum(e, axis=1, keepdims=True)
      return e/s   
    def softmax_derivative(self, z):
       return self.softmax(z)*self.softmax(1-z)
    
    def stringToFunc(self, stringAFunc, x):
        if (stringAFunc.equals("Relu")): return self.relu(x)
        elif (stringAFunc.equals("Sigmoid")): return self.sigmoid(x)
        elif (stringAFunc.equals("tanh")): return self.tanh(x)
        else: return self.softmax(x)

    def stringToFuncDer(self, stringAFunc, x):
        if (stringAFunc.equals("Relu")): return self.relu_derivative(x)
        elif (stringAFunc.equals("Sigmoid")): return self.sigmoid_derivative(x)
        elif (stringAFunc.equals("tanh")): return self.tanh_derivative(x)
        else: return self.softmax_derivative(x)



Multilayer perceptron algorithm:

In [9]:
class MLP:
    
    def __init__(self, aFunc, numOfHiddenLayers, numOfHiddenUnits):
        self.aFunc = aFunc
        self.numOfHiddenLayers = numOfHiddenLayers
        self.numOfHiddenUnits = numOfHiddenUnits

        self.weights = {}
        self.biases = {}

        # Initialize biases for input layer
        self.biases.append(np.zeros(self.numOfHiddenUnits[0]))
        
        # Initialize weights and biases for hidden layers
        for i in range(1, self.numOfHiddenLayers):
            self.weights.append(np.random.randn(self.numOfHiddenUnits[i], self.numOfHiddenUnits[i-1]))
            self.biases.append(np.zeros(self.numOfHiddenUnits[i]))
        
        # Initialize weights and biases for output layer
        self.weights.append(np.random.randn(10, self.numOfHiddenUnits[-1]))
        self.biases.append(np.zeros(10))
            
    def fit(self, x, y, lr, epoch, optimizer):
        
        def backprop(self, x, y):
            z = {}
            a = {1: x}  
            dr = None 
            
            if (self.aFunc[i].equals("Relu")): dr = MISC.relu(z[i])
            elif (self.aFunc[i].equals("Sigmoid")): dr = MISC.sigmoid(z[i])
            elif (self.aFunc[i].equals("tanh")):dr = MISC.tanh(z[i])
            else: dr = MISC.softmax(z[i])
            
            for i in range(1, self.numOfHiddenLayers):
                z[i + 1] = np.dot(a[i], self.weights[i]) + self.biases[i]
                a[i + 1] = MISC.stringToFunc(self.aFunc[i + 1], z[i + 1])

            delta = self.cost_derivative(a[-1], y) * MISC.softmax_derivative(z[-1])
            dw = np.dot(a[self.numOfHiddenLayers - 1].T, delta)
        
            update_params = {
                self.numOfHiddenLayers - 1: (dw, delta)
            }

            for i in reversed(range(2, self.numOfHiddenLayers)):
                dr = MISC.stringToFuncDer(self.aFunc[i], z[i])
                delta = np.dot(delta, self.weights[i].T) * dr
                dw = np.dot(a[i - 1].T, delta)
                update_params[i - 1] = (dw, delta)

            return update_params

        self.params = optimizer.run(backprop, x, y)
        return self
            
    def predict(self, x):
        z = x
        for v in self.params[:-1]:
            z = self.aFunc(np.dot(x, v)) #N x M
            x = z
        yh = MISC.softmax(np.dot(z, self.params[-1]))#N
        return yh    

GradientDescent Algorithm

In [10]:
class GradientDescent:
    
    def __init__(self, learning_rate=.001, batch_size=16, max_iters=1e4, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.max_iters = max_iters
        self.epsilon = epsilon
        self.batch_size = batch_size
        
    def run(self, gradient_fn, x, y, params):
        m = x.shape
        num_batches = m[0] // self.batch_size
        norms = np.array([np.inf])
        t = 1
        while np.any(norms > self.epsilon) and t < self.max_iters:
            permutation = np.random.permutation(m[1])
            x_shuffle = x[:, permutation]
            y_shuffle = y[:, permutation]
            
            for a in range(num_batches):
                start = a * self.batch_size
                end = start + self.batch_size
                x_batch = x_shuffle[start:end,:]
                y_batch = y_shuffle[start:end,:]

                # Compute the gradient 
                grad = gradient_fn(x_batch, y_batch, params)
                for p in range(len(params)):
                    params[p] -= self.learning_rate * grad[p]
                norms = np.array([np.linalg.norm(g) for g in grad])
            t += 1
        return params

In [11]:
no_hidden = MLP(MISC.softmax,0,0)
optimizer = GradientDescent()
fit_no_hidden = no_hidden.fit(batches[0][0],batches[0][1],.001, 100, optimizer)
single_layer=  MLP((MISC.relu, MISC.softmax),1,256)
fit_single_layer = single_layer.fit(batches[0][0],batches[0][1],.001, 100, optimizer)
two_layers = MLP((MISC.relu, MISC.softmax),2,256)
fit_two_layers = two_layers.fit(batches[0][0],batches[0][1],.001, 100, optimizer)


AttributeError: 'dict' object has no attribute 'append'