In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/mnist-dataset/train-images.idx3-ubyte
/kaggle/input/mnist-dataset/t10k-labels.idx1-ubyte
/kaggle/input/mnist-dataset/t10k-images.idx3-ubyte
/kaggle/input/mnist-dataset/train-labels.idx1-ubyte
/kaggle/input/mnist-dataset/t10k-labels-idx1-ubyte/t10k-labels-idx1-ubyte
/kaggle/input/mnist-dataset/t10k-images-idx3-ubyte/t10k-images-idx3-ubyte
/kaggle/input/mnist-dataset/train-labels-idx1-ubyte/train-labels-idx1-ubyte
/kaggle/input/mnist-dataset/train-images-idx3-ubyte/train-images-idx3-ubyte


In [2]:
import struct
from array import array
from os.path  import join

#
# MNIST Data Loader Class
#
class MnistDataloader(object):
    def __init__(self, training_images_filepath,training_labels_filepath,
                 test_images_filepath, test_labels_filepath):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath
        self.test_labels_filepath = test_labels_filepath
    
    def read_images_labels(self, images_filepath, labels_filepath):        
        labels = []
        with open(labels_filepath, 'rb') as file:
            magic, size = struct.unpack(">II", file.read(8))
            if magic != 2049:
                raise ValueError('Magic number mismatch, expected 2049, got {}'.format(magic))
            labels = array("B", file.read())        
        
        with open(images_filepath, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
            if magic != 2051:
                raise ValueError('Magic number mismatch, expected 2051, got {}'.format(magic))
            image_data = array("B", file.read())        
        images = []
        for i in range(size):
            images.append([0] * rows * cols)
        for i in range(size):
            img = np.array(image_data[i * rows * cols:(i + 1) * rows * cols])
            img = img.reshape(28, 28)
            images[i][:] = img            
        images_ar = np.array(images)
        labels_ar = np.array(labels)
        return images_ar, labels_ar
            
    def load_data(self):
        x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
        x_test, y_test = self.read_images_labels(self.test_images_filepath, self.test_labels_filepath)
        return (x_train, y_train),(x_test, y_test) 

In [3]:
#
# Verify Reading Dataset via MnistDataloader class
#
%matplotlib inline
import random
import matplotlib.pyplot as plt

#
# Set file paths based on added MNIST Datasets
#
training_images_filepath = join('/kaggle/input/mnist-dataset/train-images.idx3-ubyte')
training_labels_filepath = join('/kaggle/input/mnist-dataset/train-labels.idx1-ubyte')
test_images_filepath = join('/kaggle/input/mnist-dataset/t10k-images.idx3-ubyte')
test_labels_filepath = join('/kaggle/input/mnist-dataset/t10k-labels.idx1-ubyte')


#
# Load MINST dataset
#
mnist_dataloader = MnistDataloader(training_images_filepath, training_labels_filepath, test_images_filepath, test_labels_filepath)
(X_train, Y_train), (X_test, Y_test) = mnist_dataloader.load_data()

def extract_data(X):
    X0 = np.asarray(X)/256
    X0 = X0.reshape(X0.shape[0],-1).T
    return X0
X_train = extract_data(X_train)
X_test = extract_data(X_test)
print(X_train.shape,Y_train.shape)



(784, 60000) (60000,)


In [4]:
# softmax
def softmax_stable(Z):
    e_V = np.exp(Z-np.max(Z,axis=0,keepdims=True))
    Z = e_V/e_V.sum(axis=0)
    # print(Z)
    return Z

#onehot coding
from scipy import sparse
def convert_label(y,C):
    Y = sparse.coo_matrix((np.ones_like(y),
        (y, np.arange(len(y)))), shape = (C, len(y))).toarray()
    return Y

# cost or loss function
def cost(Y, Yhat):
    return -np.sum(Y*np.log(Yhat))/Y.shape[1]


d0 = X_train.shape[0]
d1 = 100 #hiden layer
d2 = 10 #number of class

#initial parameters randomly
w1 = 0.01*np.random.randn(d0,d1)
b1 = np.zeros((d1,1))
w2 = 0.01*np.random.randn(d1,d2)
b2 = np.zeros((d2,1))

N = X_train.shape[1] # number of trainning data points
Y = convert_label(Y_train,d2)
print(Y.shape)
eta = 0.6 #learning rate

print(Y.shape)
#multi layer perceptron
def multi_LP(X,Y,eta,w1,w2,b1,b2):
    count = 0
    while count < 1000:
        # mixdata = np.random.permutation(N)
        # for i in mixdata:
        # print(count)
        # xi = X[:,i].reshape(d0,1)
        # yi = Y[:,i].reshape(d2,1)
        #feedforward
        Z1 = np.dot(w1.T,X) + b1
        A1 = np.maximum(Z1,0)
        Z2 = np.dot(w2.T,A1) + b2
        Yhat = softmax_stable(Z2)
            
        # print loss after each 1000 iterations
        if count %10 == 0:
            # compute the loss: average cross-entropy loss
            loss = cost(Y, Yhat)
            print(np.linalg.norm(Yhat))
            print("iter %d, loss: %f" %(count, loss))
            
        #backpropagation
        E2 = (Yhat - Y)/N  #we chose that it define the scale of loss function follow Z2
        dw2 = np.dot(A1,E2.T)
        db2 = np.sum(E2, axis = 1, keepdims = True)
        E1 = np.dot(w2,E2)
        E1[Z1 <= 0] = 0  #ReLU
        dw1 = np.dot(X,E1.T)
        db1 = np.sum(E1, axis = 1, keepdims = True)
            
        #SGD
        w1 += -eta*dw1
        b1 += -eta*db1
        w2 += -eta*dw2
        b2 += -eta*db2
            
        count += 1
            
    return w1,w2,b1,b2

(w1,w2,b1,b2) = multi_LP(X_train,Y,eta,w1,w2,b1,b2)

(10, 60000)
(10, 60000)
77.46089051183256
iter 0, loss: 2.302269
82.53275485043918
iter 10, loss: 1.925252
155.07781388166202
iter 20, loss: 0.830688
173.64855168184724
iter 30, loss: 0.677673
196.68992320024668
iter 40, loss: 0.496658
207.08043760484276
iter 50, loss: 0.545617
211.75019673481236
iter 60, loss: 0.395026
214.60735722440273
iter 70, loss: 0.393553
217.40233178137805
iter 80, loss: 0.342942
219.16951156095146
iter 90, loss: 0.323145
220.4368580058433
iter 100, loss: 0.309664
221.40710463385219
iter 110, loss: 0.299578
222.19149854125757
iter 120, loss: 0.291542
222.95784424642304
iter 130, loss: 0.281855
223.72590361436872
iter 140, loss: 0.271060
224.39258558176618
iter 150, loss: 0.262179
224.97291545308912
iter 160, loss: 0.254793
225.49132090349838
iter 170, loss: 0.248191
225.96895229198194
iter 180, loss: 0.242108
226.40720933746286
iter 190, loss: 0.236414
226.81015278348517
iter 200, loss: 0.231030
227.18789331072077
iter 210, loss: 0.225945
227.5467154643051
iter

In [5]:
z1 = np.dot(w1.T,X_test) + b1
A1 = np.maximum(z1,0)
z2 = np.dot(w2.T,A1) + b2
from sklearn.metrics import accuracy_score
def pred(Z):
    Y_pred  =  softmax_stable(Z)
    return np.argmax(Y_pred,axis=0)

y_pred = pred(z2)
print( "Accuracy: %.2f %%" %(100*accuracy_score(Y_test, y_pred.tolist())))

Accuracy: 97.12 %
