In [46]:
import numpy as np
import pandas as pd
from keras.datasets import mnist
%matplotlib inline
import matplotlib.pyplot as plt
from keras.utils import to_categorical
#define useful function
def sigmoid(x):
    return 1/(1+np.exp(-x))

def cross_entropy(actual, predict, eps=1e-15):
    actual = np.array(actual)
    predict = np.array(predict)
    
    clipped_predict = np.minimum(np.maximum(predict,eps),1-eps)
    loss = actual*np.log(clipped_predict)+(1-actual)*np.log(1-clipped_predict)
    return -1.0*loss.mean()

# parameter load and declare
((x_train,y_train),(x_test,y_test)) = mnist.load_data()
x_train = x_train.reshape(60000,28*28)
x_test = x_test.reshape(10000,28*28)
print(x_test.shape)
y_train_hot = to_categorical(y_train)
y_test_hot = to_categorical(y_test)

(10000, 784)


### solving Mnist by Simple Neural NetWork MLP

In [47]:
num_epoch = 100
learning_rate = 0.01
w=np.random.uniform(low=-1.0,high=1.0,size=(784,10))
b=np.random.uniform(low=-1.0,high=1.0,size=10)

for epoch in range(num_epoch):
    y_predict_hot = x_train.dot(w)+b
    y_predict_hot = sigmoid(y_predict_hot)
    y_predict = np.argmax(y_predict_hot,axis=1)
    accuracy = (y_predict==y_train).mean()
    if accuracy >0.8:
        break
    loss = cross_entropy(y_train_hot,y_predict_hot)
    
    if epoch%10 ==0:
        print("{:2} accuracy = {:.3f}, loss = {:.5f}".format(epoch,accuracy,loss))
    
    w = w -learning_rate*x_train.T.dot(y_predict_hot-y_train_hot)
    b = b - learning_rate*(y_predict_hot-y_train_hot).mean(axis=0)
print('learning finished')
print("{0:2} accuracy = {1:.5f}, loss = {2:.5f}".format(epoch, accuracy, loss))
    

  if __name__ == '__main__':


 0 accuracy = 0.065, loss = 18.60329
10 accuracy = 0.695, loss = 1.72690
20 accuracy = 0.711, loss = 1.39411
30 accuracy = 0.612, loss = 1.99717
40 accuracy = 0.778, loss = 1.66784
learning finished
44 accuracy = 0.84118, loss = 1.35094


In [48]:
## Evaluate

In [49]:
y_predict_hot = x_train.dot(w)+b
y_predict_hot = sigmoid(y_predict_hot)
y_predict = np.argmax(y_predict_hot,axis=1)
correct = (y_predict == y_train)
train_result = pd.DataFrame({'actual':y_train,'predict':y_predict,'x correct?': correct})
train_accuracy = correct.astype('int').mean()
print(train_accuracy)
train_result.head(10)

0.8411833333333333


  if __name__ == '__main__':


Unnamed: 0,actual,predict,x correct?
0,5,5,True
1,0,0,True
2,4,4,True
3,1,1,True
4,9,9,True
5,2,2,True
6,1,1,True
7,3,3,True
8,1,1,True
9,4,4,True


In [50]:
## test

In [51]:

y_predict_hot = x_test.dot(w)+b
y_predict_hot = sigmoid(y_predict_hot)
y_predict = np.argmax(y_predict_hot,axis=1)

accuracy = (y_test==y_predict).mean()
print('accuracy',accuracy)
test_result = pd.DataFrame({'test_data':y_test,'predict':y_predict,'xxx is correct?':(y_test==y_predict)})
test_result.head(10)

accuracy 0.8469


  if __name__ == '__main__':


Unnamed: 0,predict,test_data,xxx is correct?
0,7,7,True
1,2,2,True
2,1,1,True
3,0,0,True
4,4,4,True
5,1,1,True
6,4,4,True
7,9,9,True
8,0,5,False
9,9,9,True


In [52]:
### Solving MNIST problem using Multi-layer Neural Network 2-hidden layer 

In [53]:
num_epoch = 300
learning_rate = 0.01

w2 = np.random.uniform(low=-1.0,high=1.0,size=(784,10))
w1 = np.random.uniform(low=-1.0,high=1.0,size=(784,784))
b2 = np.random.uniform(low=-1.0,high=1.0,size=10)
b1 = np.random.uniform(low=-1.0,high=1.0,size=784)

for epoch in range(num_epoch):
    
    z1 = x_train.dot(w1)+b1
    a1 = sigmoid(z1)
    z2 = a1.dot(w2)+b2
    a2 = sigmoid(z2)
    
    d2 = a2-y_train_hot  #(60000,10)
    d1 = (a1*(1-a1)).T.dot(x_train).dot(w2) # (784,10)
    
    y_predict = np.argmax(a2,axis=1)
    accuracy = (y_predict==y_train).mean()
    loss = cross_entropy(y_train_hot,a2)
    if(accuracy >0.99) :
        break
    if(epoch %1 == 0 ):
        print("{} accuracy = {:.3f}, loss = {:.3f}".format(epoch,accuracy,loss))
    
    w2 = w2-learning_rate*(d2.T.dot(a1)).T
    b2 = b2-learning_rate*d2.mean(axis=0)
    w1 = w1-learning_rate*d1.dot(d2.T).dot(x_train)
    b1 = b1-learning_rate*d1.dot(d2.T).mean(axis=1)
    

print('----' * 10)
print("{0:2} accuracy = {1:.5f}".format(epoch, accuracy))    
    

  if __name__ == '__main__':


0 accuracy = 0.107, loss = 4.152
1 accuracy = 0.097, loss = 11.798
2 accuracy = 0.102, loss = 0.844
3 accuracy = 0.102, loss = 0.842
4 accuracy = 0.102, loss = 0.840
5 accuracy = 0.102, loss = 0.838
6 accuracy = 0.102, loss = 0.835
7 accuracy = 0.102, loss = 0.833
8 accuracy = 0.102, loss = 0.831
9 accuracy = 0.102, loss = 0.828
10 accuracy = 0.102, loss = 0.826
11 accuracy = 0.102, loss = 0.824
12 accuracy = 0.102, loss = 0.822
13 accuracy = 0.102, loss = 0.820
14 accuracy = 0.102, loss = 0.817
15 accuracy = 0.102, loss = 0.815
16 accuracy = 0.102, loss = 0.813
17 accuracy = 0.102, loss = 0.811
18 accuracy = 0.102, loss = 0.809


KeyboardInterrupt: 

In [None]:
##Result

In [None]:
z1 = x_train.dot(w1)+b1
a1 = sigmoid(z1)
z2 = a1.dot(w2)+b2
a2 = sigmoid(z2)
y_predict = np.argmax(a2,axis=1)
correct = (y_predict == y_train)
train_result = pd.DataFrame({'actual':y_train,'predict':y_predict,'x correct?': correct})
train_accuracy = correct.astype('int').mean()
print(train_accuracy)
train_result.head(10)

In [None]:
## test data

In [None]:

z1 = x_test.dot(w1)+b1
a1 = sigmoid(z1)
z2 = a1.dot(w2)+b2
a2 = sigmoid(z2)
y_predict = np.argmax(a2,axis=1)

accuracy = (y_test==y_predict).mean()
print('accuracy',accuracy)
test_result = pd.DataFrame({'test_data':y_test,'predict':y_predict,'xxx is correct?':(y_test==y_predict)})
test_result.head(10)