In [None]:
import numpy as np
import pandas as pd
import progressbar
import os
import matplotlib.pyplot as pyplot
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold 
from urllib.request import urlopen


# For reproducibility, set the initial seed
seed = 7
np.random.seed(7)
def load_dataset(dataset_fp, delimiter=",",chunksize=1000):
    if not os.path.isfile(dataset_fp):
        response = urlopen("http://cs.mcgill.ca/~ksinha4/datasets/kaggle/" + dataset_fp)
        CHUNK = 16 * chunksize
        with open(dataset_fp, 'wb') as f:
            while True:
                chunk = response.read(CHUNK)
                if not chunk:
                    break
                f.write(chunk)
    
    
    chunks = []
    pb = progressbar.ProgressBar()
    for chunk in pb(pd.read_csv(dataset_fp, delimiter=delimiter, chunksize=chunksize, header=None)):
        chunks.append(chunk)
        
    dataset = pd.concat(chunks)
    return dataset.as_matrix()


In [None]:
# Doing some preprocessing 

import math
import skimage 
import cv2
from skimage.filters import threshold_otsu
from skimage.measure import label, regionprops
from skimage.morphology import closing, square
from skimage.util import pad
from skimage.transform import warp, AffineTransform
from scipy import ndimage
from PIL import Image, ImageOps

def get_regions_otsu_method(image):
    bw = closing(image > 0.99, square(1))
    
    # label image regions
    label_image = label(bw)
    return [region.image for region in regionprops(label_image)]

def max_region_by_area(regions):
    return max(regions, key = lambda x : max(x.shape[0] * x.shape[0], x.shape[1] * x.shape[1]))


def to_squre(region):
    #convert rectangular image to square, keeping the ratio
    (h, w) = region.shape
    desired_size = 32
    delta_w = desired_size - w
    delta_h = desired_size - h
    padding = (delta_w//2, delta_h//2, delta_w-(delta_w//2), delta_h-(delta_h//2))
    im = Image.fromarray(region.astype('uint8')*255)
    new_im = ImageOps.expand(im, padding)
    im_array = np.array(new_im)
    transformed_im = skimage.transform.resize(im_array, (desired_size,desired_size))
    return transformed_im

def preprocess_image(image):
    p_image = image.reshape(64,64)
    p_image = p_image.astype('float32')
    regions = get_regions_otsu_method(p_image)
    max_area_region = max_region_by_area(regions)
    return to_squre(max_area_region)
    

In [None]:
class Neural_Network(object):
    def __init__(self,size, learning_rate, epoch):

        self.input_size = 1024
        self.hidden_size = size
        self.output_size = 10
        self.lr = learning_rate
        self.epo = epoch

        #weights
        self.weight_hidden = np.random.randn(self.input_size, self.hidden_size)* np.sqrt(2.0/self.input_size) # (1024*hidden_size) weight matrix from input to hidden layer
        self.weight_output = np.random.randn(self.hidden_size, self.output_size)* np.sqrt(2.0/self.hidden_size) # (hidden_size*10) weight matrix from hidden to output layer

    def sigmoid(self, z):
        # activation function
        z = np.clip( z, -500, 500 )
        return 1.0 /(1.0 + np.exp(-z))

    def sigmoidPrime(self, z):
        #derivative of sigmoid
        return z*(1- z)

    def forward(self, X):
        #forward propagation through our network
        self.z = np.dot(X, self.weight_hidden) 
        self.act_hidden = self.sigmoid(self.z) # hidden layer activation function
        self.act_output = np.dot(self.act_hidden, self.weight_output) 
        output = self.sigmoid(self.act_output) # output activation function
        #output = self.softmax(self.act_output) 
        return output
    
    def backward(self, X, y, output):
        learning_rate = 0.1
        self.error_output = y - output 
        self.delta_output = self.error_output*self.sigmoidPrime(output) 

        self.error_hidden = self.delta_output.dot(self.weight_output.T) 
        self.delta_hidden = self.error_hidden*self.sigmoidPrime(self.act_hidden) 
        
        self.weight_hidden += self.lr*X.reshape(self.input_size,1).dot(self.delta_hidden.reshape(1, self.hidden_size)) # update weight in hidden layer
        self.weight_output += self.lr*self.act_hidden.reshape(self.hidden_size, 1).dot(self.delta_output.reshape(1, self.output_size)) #update wight in output layer
        
    def gradient(self, X, y):
        #do gradient descent on sample
            output = self.forward(X)
            self.backward(X, y, output)
                       
    def evaluate(self, X, original_y):
        # y is an array with single digit
        output = self.forward(X)
        sum_y = 0
        size = X.shape[0]
        for i in range(X.shape[0]):
            predict = np.argmax(output[i])
            if predict == original_y[i][0]:
                sum_y += 1         
        return  1.0*sum_y/size

        
    def predict(self, xtrainset, ytrainset, xvalidset, original_y):
        print "epoch",
        for j in range(self.epo):
            print j,
            for i in range(xtrainset.shape[0]):
                self.gradient(xtrainset[i],ytrainset[i])
#             print("the {}/12 epoch:".format(j),self.evaluate(xvalidset, original_y))
        return self.evaluate(xvalidset, original_y)


In [None]:
xtrainload = load_dataset("train_x.csv")
ytrainload = load_dataset("train_y.csv")

In [None]:
pb = progressbar.ProgressBar()

# preprocess x 
xtrain = xtrainload / 255.0
xtrain_preprocessed = []
for x in pb(xtrain):
    result = preprocess_image(x)
    result = result.reshape(1024)
    xtrain_preprocessed.append(result)
xtrain = np.asarray(xtrain_preprocessed)

# Preprocess y 
def one_hot(y):
    size = y.shape[0]
    y_new = np.empty((size,10))
    for i in range(size):
        y_one_hot = np.zeros(10)
        index = int(y[i][0])
        y_one_hot[index] = 1
        y_new[i] = y_one_hot
    return y_new
ytrain = one_hot(ytrainload)

#use 5 folds cross validation
kf = KFold(n_splits=5,shuffle=False)


In [None]:

# hidden node = 100, learning rate = 0.1
record = []
i = 1
for train_index, val_index in kf.split(xtrain):
    print "creating NN"
    net1 = Neural_Network(300, 0.1, 12)
    print "spliting samples..."
    xtrainset, xvalidset = xtrain[train_index], xtrain[val_index]
    ytrainset, yvalidset = ytrain[train_index], ytrainload[val_index] # use orginal y digit array
    print "Training model with fold " + str(i) + "..."
    result = net1.predict(xtrainset, ytrainset, xvalidset, yvalidset)
    print("accuracy for fold {}:".format(i), result)
    record.append(result)
    i+=1
print("mean accuracy = ", np.mean(record))

In [None]:

# hidden node = 100, learning rate = 0.1
record = []
i = 1
for train_index, val_index in kf.split(xtrain):
    print "creating NN"
    net1 = Neural_Network(500, 0.1, 12)
    print "spliting samples..."
    xtrainset, xvalidset = xtrain[train_index], xtrain[val_index]
    ytrainset, yvalidset = ytrain[train_index], ytrainload[val_index] # use orginal y digit array
    print "Training model with fold " + str(i) + "..."
    result = net1.predict(xtrainset, ytrainset, xvalidset, yvalidset)
    print("accuracy for fold {}:".format(i), result)
    record.append(result)
    i+=1
print("mean accuracy = ", np.mean(record))

In [None]:
# hidden node = 64, learning rate = 0.1

record = []
i = 1
for train_index, val_index in kf.split(xtrain):

    net1 = Neural_Network(300, 0.07, 12)
    xtrainset, xvalidset = xtrain[train_index], xtrain[val_index]
    ytrainset, yvalidset = ytrain[train_index], ytrainload[val_index] # use orginal y digit array
    result = net1.predict(xtrainset, ytrainset, xvalidset, yvalidset)
    print("accuracy for fold {}:".format(i), result)
    record.append(result)
    i+=1
print("mean accuracy = ", np.mean(record))

In [None]:
# hidden node = 300, learning rate = 0.1

record = []
i = 1
for train_index, val_index in kf.split(xtrain):

    net3 = Neural_Network(300, 0.085, 12)
    xtrainset, xvalidset = xtrain[train_index], xtrain[val_index]
    ytrainset, yvalidset = ytrain[train_index], ytrainload[val_index] # use orginal y digit array
    result = net3.predict(xtrainset, ytrainset, xvalidset, yvalidset)
    print("accuracy for fold {}:".format(i), result)
    record.append(result)
    i+=1
print("mean accuracy = ", np.mean(record))

In [None]:

# hidden node = 600, learning rate = 0.1

record = []
i = 1
for train_index, val_index in kf.split(xtrain):

    net4 = Neural_Network(400, 0.1, 12)
    xtrainset, xvalidset = xtrain[train_index], xtrain[val_index]
    ytrainset, yvalidset = ytrain[train_index], ytrainload[val_index] # use orginal y digit array
    result = net4.predict(xtrainset, ytrainset, xvalidset, yvalidset)
    print("accuracy for fold {}:".format(i), result)
    record.append(result)
    i+=1
print("mean accuracy = ", np.mean(record))

In [None]:
# hidden node = 300, learning rate = 0.5

record = []
i = 1
for train_index, val_index in kf.split(xtrain):

    net5 = Neural_Network(300, 0.5, 12)
    xtrainset, xvalidset = xtrain[train_index], xtrain[val_index]
    ytrainset, yvalidset = ytrain[train_index], ytrainload[val_index] # use orginal y digit array
    result = net5.predict(xtrainset, ytrainset, xvalidset, yvalidset)
    print("accuracy for fold {}:".format(i), result)
    record.append(result)
    i+=1
print("mean accuracy = ", np.mean(record))