In [20]:
import numpy as np
import matplotlib.pyplot as plt

import sys
sys.path.append('./python-mnist/')
from mnist import MNIST

def ReLU(x):
    return np.maximum(x, 0)

def sigmoid(x):
    return 1.7159*np.tanh(2*x/3)

def dReLU(x):
    x[x >0] = 1
    x[x <= 0] = 0
    return x

def dsigmoid(x):
    return 1.7159*(1-np.power(np.tanh(2*x/3),2))*2/3

def softmax(x):
    # Find the largest a, and subtract it from each a in order to prevent overflow
    x_max = np.max(x,1).reshape(x.shape[0],1)
    sum_exp_x = np.sum(np.exp(x - x_max),1).reshape(x.shape[0],1) 
    pred_y = np.exp(x - x_max) / (sum_exp_x+0.0) 

    return pred_y

def random_init_weights(input_size, output_size):
    return np.random.normal(0,np.power(input_size,-0.5),(input_size,output_size))

def random_init_bias(output_size):
    return np.random.randn(1, output_size)

def zero_init_delta_w(input_size, output_size):
    return np.zeros((input_size,output_size))

class Network():

    def __init__(self, layers, init_method_weights = random_init_weights, init_method_bias = random_init_bias, init_method_delta_w = zero_init_delta_w, activation_fn = "ReLU", learning_rate = 0.01, momentum = 0.9, epoches = 1, batch_size = 256):
        self.layers = layers
        self.init_method_weights = init_method_weights
        self.init_method_bias = init_method_bias
        self.init_method_delta_w = init_method_delta_w

        self.setup_layers()
        self.epoches = epoches
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.batch_size = batch_size

        if activation_fn == "sigmoid":
            self.activation_fn = sigmoid
            self.activation_dfn = dsigmoid
        elif activation_fn == "ReLU":
            self.activation_fn = ReLU
            self.activation_dfn = dReLU


    def setup_layers(self):
        self.w = [ self.init_method_weights(input_size, output_size) for input_size, output_size in zip(self.layers[:-1], self.layers[1:])]
        self.delta_w = [ self.init_method_delta_w(input_size, output_size) for input_size, output_size in zip(self.layers[:-1], self.layers[1:])]
        self.b = [ self.init_method_bias(output_size) for output_size in self.layers[1:]]

    def forward(self, x):
        for weight, bias in zip(self.w[:-1], self.b[:-1]):
            x = self.activation_fn(np.matmul(x, weight) + bias)

        pred_y = softmax(np.matmul(x, self.w[-1]) + self.b[-1])
        return pred_y

    def get_activations(self, x):
        activations = [x] 
        pre_activations = []
        for weight, bias in zip(self.w[:-1], self.b[:-1]):

            x = np.matmul(x, weight) + bias
            pre_activations.append(x)
            x = self.activation_fn(x)
            activations.append(x)
        
        pred_y = np.matmul(x, self.w[-1]) + self.b[-1]    
        pre_activations.append(pred_y)    
        pred_y = softmax(pred_y)
        activations.append(pred_y)
        
        return activations, pre_activations
    
    def gradient_check(self, dw, train_data_batch, train_label_batch):
        epsilon = np.power(10,-2.0)
        for k in range(0, np.array(self.w).shape[0]):
            for i in range(0, np.array(self.w)[k].shape[0]):
                for j in range(0, np.array(self.w)[k].shape[1]):
                    self.w[k][i][j] = self.w[k][i][j] + epsilon
                    loss_plus = self.loss_check(train_data_batch, train_label_batch)
                    self.w[k][i][j] = self.w[k][i][j] - 2 * epsilon
                    loss_minus = self.loss_check(train_data_batch, train_label_batch)
                    self.w[k][i][j] = self.w[k][i][j] + epsilon
                    if np.abs((loss_plus - loss_minus) / (2 * epsilon) - dw[k][i][j]) <= np.power(10,-4.0):
                        print("gradient check passed!")
                    else:
                        print("gradient check failed!")
                
    def loss_check(self, input_data, train_label_batch):
        pred_y = self.forward_check(input_data)

        pred_y[pred_y == 0.0] = 1e-15
        log_pred_y = np.log(pred_y)
        loss_ = -np.sum(train_label_batch * log_pred_y) / (train_label_batch.shape[0]+0.0)

        return loss_
    
    def forward_check(self, x):
        for weight, bias in zip(self.w[:-1], self.b[:-1]):
            x = self.activation_fn(np.matmul(x, weight) + bias)

        pred_y = softmax(np.matmul(x, self.w[-1]) + self.b[-1])
        return pred_y        

    def momentum_update(self, gradient, delta_w_):
        delta_w_ = [delta_w * self.momentum for delta_w in delta_w_]
        delta_w_ = self.learning_rate * gradient / (self.batch_size+0.0) + delta_w_ #delta_w has same dimension as w
        return delta_w_  
            
    def update_mini_batch(self, train_data_batch, train_label_batch, one_hot_labels):
        dw = [np.zeros(weight.shape) for weight in self.w]
        db = [np.zeros(bias.shape) for bias in self.b]

        for train_data, train_label in zip(train_data_batch, train_label_batch):
            dw_, db_ = self.backpropagation(train_data, train_label)
            dw = [dweight + dweight_ for dweight, dweight_ in zip(dw, dw_)]
            db = [dbias + dbias_ for dbias, dbias_ in zip(db, db_)]
        counter = 0
        self.gradient_check(dw, train_data_batch, train_label_batch)
        for weight, dw_, delta_w_ in zip(self.w, dw, self.delta_w):
            self.delta_w[counter] = self.momentum_update(dw_, delta_w_)
            weight = weight + self.momentum_update(dw_, delta_w_)
            self.w[counter] = weight
            counter = counter + 1
        #self.w = [weight + self.momentum_update(dw_, delta_w_) for weight, dw_, delta_w_ in zip(self.w, dw, self.delta_w)]
        self.b = [bias + self.learning_rate * db_ / (train_data_batch.shape[0]+0.0)  for bias, db_ in zip(self.b, db)]


        #self.w = [weight + self.learning_rate * dw_ / (train_data_batch.shape[0]+0.0) for weight, dw_ in zip(self.w, dw)]
        #self.b = [bias + self.learning_rate * db_ / (train_data_batch.shape[0]+0.0)  for bias, db_ in zip(self.b, db)]
        
    def backpropagation(self, train_data, train_label):
        train_data = train_data.reshape(1, train_data.shape[0])
        dw = [np.zeros(weight.shape) for weight in self.w ]
        db = [np.zeros(bias.shape) for bias in self.b ]

        activations, pre_activations = self.get_activations(train_data)
    
        delta = train_label - activations[-1]
        dw[-1] = np.matmul( activations[-2].transpose(), delta)

        for idx in range(2, len(self.layers)):

            pre_activation = pre_activations[-idx]
            activation = activations[-idx-1]
            delta = self.activation_dfn(pre_activation) * np.matmul(delta, self.w[-idx+1].transpose())
            dw[-idx] = np.matmul( activation.transpose(), delta)
            db[-idx] = delta  
        return dw, db

    def loss(self, input_data, one_hot_labels, labels):
        pred_y = self.forward(input_data)

        pred_y[pred_y == 0.0] = 1e-15
        log_pred_y = np.log(pred_y)
        loss_ = -np.sum(one_hot_labels * log_pred_y) / (one_hot_labels.shape[0]+0.0)

        return loss_
 
    def accuracy(self, input_data, one_hot_labels, labels):
        pred_y = self.forward(input_data)
        pred_class = np.argmax(pred_y, axis=1)
        accuracy_ = np.sum(pred_class == labels)/(pred_class.shape[0]+0.0)

        return accuracy_

    def train(self, training_images, one_hot_train_labels, training_labels, test_images, one_hot_test_labels, test_labels):

        self.accuracy(training_images, one_hot_train_labels, training_labels)

        batch_count = training_images.shape[0] / self.batch_size

        for epoch in range(self.epoches):
            idxs = np.random.permutation(training_images.shape[0]) 
            X_random = training_images[idxs]
            Y_random = one_hot_train_labels[idxs]

            for i in range(int(batch_count)):
                train_data_batch = X_random[i * self.batch_size: (i+1) * self.batch_size, :]
                train_label_batch = Y_random[i * self.batch_size: (i+1) * self.batch_size, :]

                self.update_mini_batch(train_data_batch, train_label_batch, one_hot_train_labels)

            #accuracy_ = self.accuracy(training_images, one_hot_train_labels,training_labels)
            loss_ = self.loss(training_images, one_hot_train_labels,training_labels)
            accuracy_ = self.accuracy(test_images, one_hot_test_labels, test_labels)
            
            print ("accuracy is " + str(accuracy_))
            
            

In [11]:
A = [[[1,2],[1,3]],[[2,3],[3,5]]]
print(A[0][0][0])

1


In [21]:
    data = MNIST('./python-mnist/data')
    training_images, training_labels = data.load_training()
    test_images, test_labels = data.load_testing()

    training_images = np.array(training_images)
    test_images = np.array(test_images)
    training_labels = np.array(training_labels)
    test_labels = np.array(test_labels)


    training_images = training_images / 127.5 - 1
    test_images = test_images / 127.5 - 1

    classes = 10

    one_hot_train_labels = np.eye(classes)[training_labels] 
    one_hot_test_labels = np.eye(classes)[test_labels]  

    nn = Network([784, 64, 10])

    nn.train(training_images, one_hot_train_labels, training_labels, test_images, one_hot_test_labels, test_labels)

[ 0.  0.  0.  1.  0.  0.  0.  0.  0.  0.]
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check 

gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient ch

gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient ch

gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient ch

gradient check failed!
gradient check failed!
gradient check passed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient ch

gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check passed!
gradient check failed!
gradient check failed!
gradient check failed!
gradient check failed!


KeyboardInterrupt: 

In [24]:
print(np.array(np.random.normal(0,np.power(5,-0.5),(2,2))))

[[ 0.14829941 -0.01115394]
 [ 0.12750762  0.25495727]]


In [28]:
print(np.array(np.random.randn(2, 2)))

[[ 2.89450221 -1.45678875]
 [ 0.41060642  0.50661766]]


In [41]:
print(dw[0])

NameError: name 'dw' is not defined

In [82]:
print(1.2*np.array(0))

0.0
