In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
import sys
import warnings
from keras.datasets import mnist 
from keras.utils import np_utils

# the data, shuffled and split between train and test sets 
(X_train, y_train), (X_test, y_test) = mnist.load_data()
input_dim = 784 #28*28 
X_train = X_train.reshape(60000, input_dim) 
X_test = X_test.reshape(10000, input_dim) 
X_train = X_train.astype('float32') 
X_test = X_test.astype('float32') 
X_train /= 255 
X_test /= 255
nb_classes = 10 
m=X_train.shape[0]

Using TensorFlow backend.


In [2]:
Y_train = np_utils.to_categorical(y_train, nb_classes) 
Y_test = np_utils.to_categorical(y_test, nb_classes)

In [3]:
class MulticlassModel:
    def __init__(self, n_class=None, weights=None, bias=None):
        self.n_class = n_class
        self.weights = weights
        self.bias = bias
        self.weights_history = []
    
    # softmax activation
    def softmax(self, z):
        z_exp = np.exp(z - np.max(z, -1, keepdims=True))
        return z_exp / np.sum(z_exp, -1, keepdims=True )
    
    
    # categorical cross entropy
    def cce(self, a, y):
        return -np.sum(y * np.log(a), axis=-1)
        
        
    def fit(self, x, y, epochs=1, test_x=None, test_y=None, lr=0.01, batch_size=4, verbose=0):
        self.lr = lr
        self.batch_size = batch_size
        
        if self.n_class is None:
            self.n_class = y.shape[1]
        if self.weights is None:
            self.weights = np.zeros((x.shape[1], self.n_class))
            #self.weights = np.random.randn(x.shape[1], self.n_class)
        if self.bias is None:
            self.bias = np.random.randn(1, self.n_class)
        
        history = {
            'loss': []
        }

        # for each epoch/iteration
        for epoch in range(epochs):
            if verbose > 0:
                print("Epoch {}/{}".format(epoch+1,epochs))
                
            shuffled_indices = np.random.permutation(m)
            x_shuffled = x[shuffled_indices]
            y_shuffled = y[shuffled_indices]
            loss = []
            for i in range(0, x.shape[0], batch_size):
                xi = x_shuffled[i:i+batch_size]
                yi = y_shuffled[i:i+batch_size]
                z = xi.dot(self.weights) + self.bias
                a = self.softmax(z)

                gradient = a - yi
                
                self.weights = self.weights - lr * (gradient.T @ xi).T/batch_size
                self.bias = self.bias - lr * gradient.mean(axis=0)
                
                if i % (x.shape[0]/30) == 0:
                    self.weights_history += [self.softmax(self.weights)]

            history['loss'] += [self.cce(self.predict(x_shuffled), y_shuffled).mean()]

            if verbose > 0:
                print("Loss: {}\n".format(history['loss'][-1]))
        
        return history
    
    
    def predict(self, x):
        return self.softmax(x.dot(self.weights) + self.bias)

In [4]:
model = MulticlassModel()
history = model.fit(X_train, Y_train, epochs=10, verbose=1, lr=0.01, batch_size=4)

Epoch 1/10
Loss: 0.33018503051402487

Epoch 2/10
Loss: 0.3078837961499815

Epoch 3/10
Loss: 0.2926140724086701

Epoch 4/10
Loss: 0.28232766804313464

Epoch 5/10
Loss: 0.2813970795655232

Epoch 6/10
Loss: 0.2742913880393783

Epoch 7/10
Loss: 0.27113971251835284

Epoch 8/10
Loss: 0.2704659185363247

Epoch 9/10
Loss: 0.2655948539326872

Epoch 10/10
Loss: 0.2629228773805314



In [5]:
def get_acc(y_predic,y_origin):
    temp=np.zeros_like(y_origin)
    for i in range(y_origin.shape[0]):
        if np.argmax(y_predic[i])==y_origin[i]:
            temp[i]=0
        else:
            temp[i]=1
        
    return (100.0 - np.mean(temp*100.0))


In [6]:
pred_train = model.predict(X_train)
print("Accuracy Train: ",get_acc(pred_train,y_train))
pred_test = model.predict(X_test)
print("Accuarcy Test:  ",get_acc(pred_test,y_test))

Accuracy Train:  92.78666666666666
Accuarcy Test:   92.43
