In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
import sys
import warnings
from keras.datasets import mnist 
from keras.utils import np_utils

# the data, shuffled and split between train and test sets 
(X_train, y_train), (X_test, y_test) = mnist.load_data()
input_dim = 784 #28*28 
X_train = X_train.reshape(60000, input_dim) 
X_test = X_test.reshape(10000, input_dim) 
X_train = X_train.astype('float32') 
X_test = X_test.astype('float32') 
X_train /= 255 
X_test /= 255
nb_classes = 10 
m=X_train.shape[0]

Using TensorFlow backend.


In [2]:
Y_train = np_utils.to_categorical(y_train, nb_classes) 
Y_test = np_utils.to_categorical(y_test, nb_classes)

In [3]:
def softmax(z):
    z_exp = np.exp(z - np.max(z, -1, keepdims=True))
    return (z_exp / np.sum(z_exp, -1, keepdims=True ))

In [4]:
def cce(a, y):
    return -np.sum(y * np.log(a), axis=-1)

In [5]:
def predict(w,b,x):
        return softmax(x.dot(w) + b)

In [6]:
def get_acc(y_predic,y_origin):
    temp=np.zeros_like(y_origin)
    for i in range(y_origin.shape[0]):
        if np.argmax(y_predic[i])==y_origin[i]:
            temp[i]=0
        else:
            temp[i]=1
        
    return (100.0 - np.mean(temp*100.0))

In [7]:
def init():
    epochs=20
    n_class=10
    verbose=1
    lr=0.01
    batch_size=4
    weights = np.zeros((X_train.shape[1], n_class))
    bias = np.random.randn(1, n_class)
    history = {
            'loss': []
    }
    weights_history = []
    for epoch in range(epochs):
        if verbose > 0:
            print("Epoch {}/{}".format(epoch+1,epochs))
        
        shuffled_indices = np.random.permutation(m)
        X_train_shuffled = X_train[shuffled_indices]
        Y_train_shuffled = Y_train[shuffled_indices]
        loss = []
        for i in range(0, X_train.shape[0], batch_size):
            xi = X_train_shuffled[i:i+batch_size]
            yi = Y_train_shuffled[i:i+batch_size]
            z = xi.dot(weights) + bias
            a = softmax(z)

            gradient = a - yi
                
            weights = weights - lr * (gradient.T @ xi).T/batch_size
            bias = bias - lr * gradient.mean(axis=0)
                
            if i % (X_train.shape[0]/30) == 0:
                weights_history += [softmax(weights)]
                
        history['loss'] += [cce(predict(weights, bias, X_train_shuffled), Y_train_shuffled).mean()]
        if verbose > 0:
                print("Loss= {}\n".format(history['loss'][-1]))
                
    pred_train = predict(weights, bias, X_train)
    print("Accuracy Train: ",get_acc(pred_train,y_train))
    pred_test = predict(weights, bias,X_test)
    print("Accuarcy Test:  ",get_acc(pred_test,y_test))

In [8]:
init()

Epoch 1/20
Loss: 0.3282673598661668

Epoch 2/20
Loss: 0.3026128813594132

Epoch 3/20
Loss: 0.2916717070436777

Epoch 4/20
Loss: 0.28170635733801136

Epoch 5/20
Loss: 0.2786477171044581

Epoch 6/20
Loss: 0.27501129857686357

Epoch 7/20
Loss: 0.26905595875307864

Epoch 8/20
Loss: 0.26764513725620165

Epoch 9/20
Loss: 0.2655952849205713

Epoch 10/20
Loss: 0.2645897471439178

Epoch 11/20
Loss: 0.2634790229531355

Epoch 12/20
Loss: 0.2590799333643018

Epoch 13/20
Loss: 0.2585442041744945

Epoch 14/20
Loss: 0.2611932342212328

Epoch 15/20
Loss: 0.25599514796275824

Epoch 16/20
Loss: 0.2541437667535881

Epoch 17/20
Loss: 0.2549452116944405

Epoch 18/20
Loss: 0.25635915076919824

Epoch 19/20
Loss: 0.25262894375922923

Epoch 20/20
Loss: 0.25064496158231453

Accuracy Train:  93.09166666666667
Accuarcy Test:   92.51
