In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
import sys
import warnings
from keras.datasets import mnist 
from keras.utils import np_utils

# the data, shuffled and split between train and test sets 
(X_train, y_train), (X_test, y_test) = mnist.load_data()
input_dim = 784 #28*28 
X_train = X_train.reshape(60000, input_dim) 
X_test = X_test.reshape(10000, input_dim) 
X_train = X_train.astype('float32') 
X_test = X_test.astype('float32') 
X_train /= 255 
X_test /= 255
nb_classes = 10 
m=X_train.shape[0]

Using TensorFlow backend.


In [2]:
Y_train = np_utils.to_categorical(y_train, nb_classes) 
Y_test = np_utils.to_categorical(y_test, nb_classes)

In [3]:
def softmax(z):
    z_exp = np.exp(z - np.max(z, -1, keepdims=True))
    return (z_exp / np.sum(z_exp, -1, keepdims=True ))

In [4]:
def cce(a, y):
    return -np.sum(y * np.log(a), axis=-1)

In [5]:
def predict(w,b,x):
        return softmax(x.dot(w) + b)

In [6]:
def get_acc(y_predic,y_origin):
    temp=np.zeros_like(y_origin)
    for i in range(y_origin.shape[0]):
        if np.argmax(y_predic[i])==y_origin[i]:
            temp[i]=0
        else:
            temp[i]=1
        
    return (100.0 - np.mean(temp*100.0))

In [7]:
def init():
    epochs=100
    n_class=10
    lr=0.01
    batch_size=4
    weights = np.zeros((X_train.shape[1], n_class))
    bias = np.random.randn(1, n_class)
    history = {
            'loss': []
    }
    weights_history = []
    for epoch in range(epochs):
        print("Epoch {}/{}".format(epoch+1,epochs))
        
        shuffled_indices = np.random.permutation(m)
        X_train_shuffled = X_train[shuffled_indices]
        Y_train_shuffled = Y_train[shuffled_indices]
        loss = []
        for i in range(0, X_train.shape[0], batch_size):
            xi = X_train_shuffled[i:i+batch_size]
            yi = Y_train_shuffled[i:i+batch_size]
            z = xi.dot(weights) + bias
            a = softmax(z)

            gradient = a - yi
                
            weights = weights - lr * (gradient.T @ xi).T/batch_size
            bias = bias - lr * gradient.mean(axis=0)
                
            if i % (X_train.shape[0]/30) == 0:
                weights_history += [softmax(weights)]
                
        history['loss'] += [cce(predict(weights, bias, X_train_shuffled), Y_train_shuffled).mean()]
        print("Loss= {}\n".format(history['loss'][-1]))
                
    pred_train = predict(weights, bias, X_train)
    print("Accuracy Train: ",get_acc(pred_train,y_train))
    pred_test = predict(weights, bias,X_test)
    print("Accuarcy Test:  ",get_acc(pred_test,y_test))

In [8]:
init()

Epoch 1/100
Loss= 0.33469641581631737

Epoch 2/100
Loss= 0.3026566000810552

Epoch 3/100
Loss= 0.29065051110726187

Epoch 4/100
Loss= 0.28199820262472464

Epoch 5/100
Loss= 0.2804839954764624

Epoch 6/100
Loss= 0.27331166766207576

Epoch 7/100
Loss= 0.2695757272348435

Epoch 8/100
Loss= 0.26816170285301055

Epoch 9/100
Loss= 0.26558396775467963

Epoch 10/100
Loss= 0.2642129736351226

Epoch 11/100
Loss= 0.26155659642147194

Epoch 12/100
Loss= 0.25877276673306887

Epoch 13/100
Loss= 0.25868075635791526

Epoch 14/100
Loss= 0.2603023905456453

Epoch 15/100
Loss= 0.25467450981265516

Epoch 16/100
Loss= 0.25798103567752523

Epoch 17/100
Loss= 0.25641988132970345

Epoch 18/100
Loss= 0.2521800716054014

Epoch 19/100
Loss= 0.25147873119678543

Epoch 20/100
Loss= 0.2522317748352751

Epoch 21/100
Loss= 0.25127037880805836

Epoch 22/100
Loss= 0.25074281509737256

Epoch 23/100
Loss= 0.24865767105828337

Epoch 24/100
Loss= 0.2499747234650532

Epoch 25/100
Loss= 0.24915614687315635

Epoch 26/100
Loss