<a href="https://colab.research.google.com/github/djdongjin/IFT6135-Assignment/blob/master/Copy_of_A1_1_Jin.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import matplotlib.pyplot as plt
import random
np.random.seed(1)

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
def one_hot(labels, n):
    """labels: m*1 vector
       n: expected classes
       outout: m*n matrix"""
    m = len(labels)
    onehot = np.zeros((m, n))
    onehot[np.arange(m), labels] = 1
    return onehot

DATA_PATH = r'/content/gdrive/My Drive/app/MNIST'
X_train = np.load(DATA_PATH + '/x_train.npy')
y_train = one_hot(np.load(DATA_PATH + '/y_train.npy'),10)
X_val   = np.load(DATA_PATH + '/x_val.npy')
y_val   = one_hot(np.load(DATA_PATH + '/y_val.npy'),10)
X_test  = np.load(DATA_PATH + '/x_test.npy')
y_test  = one_hot(np.load(DATA_PATH + '/y_test.npy'),10)

In [0]:
def accuracy(y_pred, y):
    return np.sum(np.argmax(y_pred, axis=1) == np.argmax(y, axis=1)) / y.shape[0]


def data_iter(data, batch_size):
    X, y = data
    batches = [(X[i:i+batch_size],y[i:i+batch_size]) for i in range(0,X.shape[0],batch_size)]
    random.shuffle(batches)
    for batch in batches:
        yield batch
                
        
def glorot(in_dim, out_dim):
    d = np.sqrt(6/(in_dim+out_dim))
    return np.random.normal(-d,d,(in_dim,out_dim))

In [0]:
INPUT_DIM = 784
OUTPUT_DIM = 10

class NN(object):
    
    
    def __init__(self,hidden_dims=[1024,2048],n_hidden=2,init='Normal',activate='relu',mode='train'):
        self.dims = [INPUT_DIM,] + hidden_dims + [OUTPUT_DIM,]
        self.weights = []
        self.biases  = []
        self.init = init
        self.activate = activate
        
        self.initialize_weights(n_hidden, self.dims)
        
        
    def initialize_weights(self, n_hidden, dims):
        init_method = None
        if self.init == 'Zero':
            init_method = lambda x, y: np.zeros(x,y)
        elif self.init == 'Normal':
            init_method = lambda x, y: np.random.randn(x,y)
        elif self.init == 'Glorot':
            init_method = glorot
            
        for (inputs, outputs) in zip(dims[:-1], dims[1:]):
            self.weights.append(init_method(inputs, outputs))
            self.biases.append(np.zeros(outputs))
            
            
    def activation(self,inputs):
        if self.activate == 'relu':
            inputs[inputs < 0] = 0
            return inputs
        if self.activate == 'sigmoid':
            return 1.0/(1.0+np.exp(-inputs))
            
            
    def forward(self, inputs, labels):
        a_k = None
        h_k = inputs
        a = []
        h = [h_k]
        for (W, b) in zip(self.weights[:-1], self.biases[:-1]):
            a_k = np.dot(h_k, W) + b
            h_k = self.activation(a_k)
            a.append(a_k)
            h.append(h_k)
        
        a_k = np.dot(h_k, self.weights[-1]) + self.biases[-1]
        h_k = self.softmax(a_k)
        a.append(a_k)
        h.append(h_k)
        
        ls = self.loss(h_k, labels)
        cache = (a, h)
        
        return h_k, ls, cache
    
    
    
    def loss(self, pred, labels):
        '''
        Negative log likelihood
        '''
        ls = np.nan_to_num(np.log(pred))
        ls = - np.sum(labels * ls, axis=1)
        return ls
    
    
    def backward(self,cache,labels,lss):
        """
        Input: cache: (as, hs)
                    as: preactivate values
                    hs: activated values
                    lss: loss for each examples
        output: grads: (grads_w, grads_b)
        """
        n_w = [np.zeros_like(w) for w in self.weights]
        n_b = [np.zeros_like(b) for b in self.biases]
        as_ = cache[0]
        hs_ = cache[1]
        for i in range(labels.shape[0]):
            nabla_w = [np.zeros_like(w) for w in self.weights]
            nabla_b = [np.zeros_like(b) for b in self.biases]
            a = [aa[i] for aa in as_]
            h = [hh[i] for hh in hs_]
            ls = lss[i]
            label = labels[i,:]
            
            # nabla l -> softmax -> pre-softmax
            nabla_a = -(label - h[-1])
            nabla_b[-1] = nabla_a
            nabla_w[-1] = np.outer(h[-2], nabla_a)
            # for each preactivate -> activation layer
            for layer in range(2, len(self.dims)):
                nabla_h = np.dot(self.weights[-layer+1], nabla_a)
                nabla_a = nabla_h * self.activate_grad(a[-layer])
                
                nabla_b[-layer] = nabla_a
                nabla_w[-layer] = np.outer(h[-layer-1], nabla_a)
                
            n_w = [x+y for x,y in zip(n_w, nabla_w)]
            n_b = [x+y for x,y in zip(n_b, nabla_b)]
            
        n_w = [x / labels.shape[0] for x in n_w]
        n_b = [x / labels.shape[0] for x in n_b]
            
        return (n_w,n_b)
   
        
    def update(self,grads,lr):
        grads_w, grads_b = grads
        for i in range(len(self.weights)):
            self.weights[i] -= lr * grads_w[i]
            self.biases[i] -= lr * grads_b[i]
            

    def train(self, data, epochs, batch_size, lr, lambd=0.0, test_data=None):
        for ep in range(1, epochs+1):
            print('Epoch',ep,':')
            for (batch_x, batch_y) in data_iter(data, batch_size):
                y_pred, ls, cache = self.forward(batch_x, batch_y)
                grads = self.backward(cache, batch_y, ls)
                self.update(grads, lr)
            if test_data:
                print('Epoch %i acc: %i.' % (ep,test(test_data)))

                
    def test(self, data):
        x, y = data
        outputs, _, _ = forward(x)
        return accuracy(outputs, y)
        
    
    def activate_grad(self,inputs):
        if self.activate == 'relu':
            inputs[inputs > 0] = 1
            inputs[inputs < 0] = 0
            return inputs
        elif self.activate == 'sigmiod':
            return self.activation(inputs) * (1 - self.activation(inputs))
        
        
    def softmax(self,inputs):
        inputs = inputs - np.max(inputs, axis=1).reshape(inputs.shape[0],1)
        outputs = np.exp(inputs)
        return outputs / (np.sum(outputs, axis=1).reshape(inputs.shape[0],1))
        

In [0]:
nn = NN(hidden_dims=[1024,512],n_hidden=2)

In [0]:
nn.train((X_train,y_train), 10, 200, 0.01, test_data=(X_train,y_train))

Epoch 1 :




KeyboardInterrupt: ignored

(784, 1024) (1024,)
(1024, 2048) (2048,)
(2048, 10) (10,)
