In [77]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import beta       
import seaborn as sns
import random
from sklearn.model_selection import train_test_split
from keras.datasets import cifar10

Getting dataset from the files

In [78]:

def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='latin1')
    return dict

def NormalizeData(X):
    X = X.astype('float64')
    # X /= np.std(X, axis = 0)
    X /=255
    return X
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
X_train = np.reshape(X_train,(50000,3072))
X_test = np.reshape(X_test,(10000,3072))
X_train = NormalizeData(X_train)
X_test =  NormalizeData(X_test)
batches =  []
batches.append(unpickle("data_batch_1"))
batches.append(unpickle("data_batch_2"))
batches.append(unpickle("data_batch_3"))
batches.append(unpickle("data_batch_4"))
batches.append(unpickle("data_batch_5"))

features = []
labels = []
for batch in batches:
    features.append(np.array(batch['data'].reshape((len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)))
    labels.append(batch['labels'])
for feat in features:
    feat = NormalizeData(feat[:])
for lab in labels:
    maxval = np.max(lab)
    lab = np.eye(maxval+1)[lab]

In [79]:

def cost_derivative(aFunc, y):
    derivative = aFunc - y
    return derivative

def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

def sigmoid_derivative(z):
    return sigmoid(z)*(1-sigmoid(z))

def relu(z):
    return np.maximum(0,z)

def relu_derivative(z):
    z[z<=0] = 0
    z[z>0] = 1
    return z

def tanh(z):
    return (np.exp(z) - np.exp(-z)) / (np.exp(z) + np.exp(-z))

def tanh_derivative(z):
    return 1 - np.power(tanh(z), 2)

def softmax(x):
    e = np.exp(x-np.max(x))
    s = np.sum(e, axis=1, keepdims=True)
    return e/s   

def softmax_derivative(z):
    return softmax(z)*softmax(1-z)


def stringToFunc(stringAFunc, x):
    if (stringAFunc == "Relu"): return relu(x)
    elif (stringAFunc == "Leaky-ReLU"): return leaky_relu(x)
    elif (stringAFunc == "Sigmoid"): return sigmoid(x)
    elif (stringAFunc == "tanh"): return tanh(x)
    else: return softmax(x)

def stringToFuncDer(stringAFunc, x):
    if (stringAFunc == "Relu"): return relu_derivative(x)
    elif (stringAFunc == "Leaky-ReLU"): return leaky_relu_derivative(x)
    elif (stringAFunc == "Sigmoid"): return sigmoid_derivative(x)
    elif (stringAFunc == "tanh"): return tanh_derivative(x)
    else: return softmax_derivative(x)

def leaky_relu(x, alpha = 0.01):
    return np.maximum(alpha * x, x)

def leaky_relu_derivative(x, alpha = 0.01):
    dx = np.ones_like(x)
    dx[x < 0] = alpha
    return dx



Multilayer perceptron algorithm:

In [80]:
logistic = lambda z: 1./ (1 + np.exp(-z))
class MLP:
    
    def __init__(self, aFunc, numOfHiddenLayers, numOfHiddenUnits, dimension):
        self.aFunc = aFunc
        self.numOfHiddenLayers = numOfHiddenLayers
        self.numOfHiddenUnits = numOfHiddenUnits
        self.dimension = dimension
        self.weights = {}
        self.biases = {}

        for i in range(len(dimension)-1):
            self.weights[i + 1] = np.random.randn(dimension[i], dimension[i + 1])
            self.biases[i + 1] = np.zeros(dimension[i + 1])
            
        # i = 1
        # # Initialize weights and biases for hidden layers
        # for i in range(self.numOfHiddenLayers):
        #     self.weights[i + 1] = (np.random.randn(self.numOfHiddenUnits[i], self.numOfHiddenUnits[i-1]))
        #     self.biases[i + 1] = (np.zeros(self.numOfHiddenUnits[i]))
        
        # # Initialize weights and biases for output layer
        # self.weights[i + 1] = np.random.randn(10, self.numOfHiddenUnits[-1])
        # self.biases[i + 1] = np.zeros(10)
        
            
    def fit(self, x, y, optimizer):
        
        def backprop(self, Mlp, x, y):
            z = {}
            a = {1: x}  
            
            for i in range(1, Mlp.numOfHiddenLayers + 2):
                z[i + 1] = np.dot(a[i], Mlp.weights[i]) + Mlp.biases[i]
                a[i + 1] = stringToFunc(Mlp.aFunc[i-1], z[i + 1])
            last = len(a)
            delta = cost_derivative(a[last], y) * softmax_derivative(a[last]) #change this to last element
            dw = np.dot(a[Mlp.numOfHiddenLayers + 1].T, delta)
        
            update_params = {
                 self.numOfHiddenLayers + 1: (dw, delta)
            }

            for i in reversed(range(2, Mlp.numOfHiddenLayers + 2)):
                dr = stringToFuncDer(Mlp.aFunc[i], z[i]) 
                delta = np.dot(delta, Mlp.weights[i].T) * dr
                dw = np.dot(a[i - 1].T, delta)
                update_params[i - 1] = (dw, delta)

            return update_params


        self = optimizer.run(self,backprop, x, y)
        return self
            
    def predict(self, x):
        z = {}
        a = {1: x}  
        
        for i in range(1, self.numOfHiddenLayers + 2):
            z[i + 1] = np.dot(a[i], self.weights[i]) + self.biases[i]
            a[i + 1] = stringToFunc(self.aFunc[i-1], z[i + 1])
        return np.argmax(a[self.numOfHiddenLayers+2],axis=1)

GradientDescent Algorithm

In [81]:
class GradientDescent:
    
    def __init__(self, learning_rate=.001, batch_size=16, max_iters=1, epsilon=1e-8, numOfHiddenLayers=0):
        self.numOfHiddenLayers = numOfHiddenLayers
        self.learning_rate = learning_rate
        self.max_iters = max_iters
        self.epsilon = epsilon
        self.batch_size = batch_size
        
    def run(self, mlp,gradient_fn, x, y):
        m = x.shape
        num_batches = m[0] // self.batch_size
        norms = np.array([np.inf])
        t = 1
        while np.any(norms > self.epsilon) and t < self.max_iters:
            #permutation = np.random.permutation(len(x))
            x_shuffle = x#[permutation]
            y_shuffle = y#[permutation]
            
            for a in range(num_batches):
                start = a * self.batch_size
                end = start + self.batch_size
                x_batch = x_shuffle[start:end:]
                y_batch = y_shuffle[start:end:]

                # Compute the gradient 
                grad = gradient_fn(self,mlp,x_batch, y_batch)
                for k, v in grad.items():
                    dw = v[0]
                    dw += (0) * mlp.weights[k] 
                    mlp.weights[k] -= self.learning_rate * dw
                    mlp.biases[k] -= self.learning_rate * np.mean(v[1], 0)
                # for p in range(len(params)):
                #     params[p] -= self.learning_rate * grad[p]
                # norms = np.array([np.linalg.norm(g) for g in grad])
            t += 1
        return mlp

In [86]:
no_hidden = MLP(("softmax", ),0,(0,), (3072,10))
optimizer = GradientDescent(numOfHiddenLayers=0)
fit_no_hidden = no_hidden.fit(X_train[:100],y_train[:100], optimizer)
yh = fit_no_hidden.predict(X_train[:100])
def mean_squared_error(a, b):
    # Calculate the difference between the matrices
    difference = (a - b)**2
    mse = np.mean(difference)
    # Return the mean squared error
    return mse
y_100 = y_train[:100]
print(mean_squared_error(yh,y_train[:100]))
# optimizer = GradientDescent(numOfHiddenLayers=1)
# single_layer=  MLP((MISC.relu, MISC.softmax),1,(256,))
# fit_single_layer = single_layer.fit(batches[0][0],batches[0][1],optimizer)
# optimizer = GradientDescent(numOfHiddenLayers=2)
# two_layers = MLP((MISC.relu, MISC.softmax),2,(256,256))
# fit_two_layers = two_layers.fit(batches[0][0],batches[0][1],optimizer)

13.3294
