In [1]:
import numpy as np 
import matplotlib.pyplot as plt
from keras.datasets import mnist
from keras.utils import np_utils


In [2]:
class Layer:
  def __init__(self):
    self.input = None
    self.output = None

    def forward(self, input):
      #TODO: return output
      pass
    
    def backward(self, output_gradient, learning_rate):
      #TODO: update parameters and return input gradient
      pass

In [3]:
class Dense(Layer):
  def __init__(self, input_size, output_size):
    self.weights = np.random.randn(output_size, input_size)
    self.bias = np.random.randn(output_size, 1)
  
  def forward(self, input):
    self.input = input
    return np.dot(self.weights, self.input) + self.bias

  def backward(self, output_gradient, learning_rate):
    weights_gradient = np.dot(output_gradient, self.input.T)
    input_gradient = np.dot(self.weights.T, output_gradient)
    self.weights -= learning_rate * weights_gradient
    self.bias -= learning_rate * output_gradient
    return input_gradient

In [4]:
def mse(y_true, y_pred):
  return np.mean(np.power(y_true - y_pred, 2))

def mse_prime(y_true, y_pred):
  return 2*(y_pred-y_true)

In [5]:
class Activation(Layer):
  def __init__(self, activation, activation_prime):
    self.activation = activation
    self.activation_prime = activation_prime
  
  def forward(self, input):
    self.input = input
    return self.activation(input)

  def backward(self, output_gradient, learning_rate):
    return np.multiply(output_gradient, self.activation_prime(self.input))

In [6]:
class Tanh(Activation):
  def __init__(self):
    def tanh(x):
      return np.tanh(x)

    def tanh_prime(x):
      return 1 - np.tanh(x) ** 2

    super().__init__(tanh, tanh_prime)


class Sigmoid(Activation):
  def __init__(self):
    def sigmoid(x):
      return 1 / (1 + np.exp(-x))
    
    def sigmoid_prime(x):
      s = sigmoid(x)
      return s * (1 - s)

      super().init(sigmoid,sigmoid_prime)
        
class Linear(Activation):
    def __init__(self):
        def linear(x):
            return x
        def linear_prime(x):
            return 1
        super().__init__(linear,linear_prime)

In [7]:

#X = np.reshape([[0,0],[0,1],[1,0],[1,1]],(4,2,1))
#Y = np.reshape([[0],[1],[1],[0]],(4,1,1))
#network = [Dense(2,3),Tanh(),Dense(3,1),Tanh()]

#epochs=500
#learning_rate=0.1
#errorList=[]

def predict(network,input):
    output=input
    for layer in network:
        output=layer.forward(output)
    return output


def train(network,loss,loss_prime,x_train,y_train,epoches,learning_rate=0.1,verbose=False):
    for e in range(epoches):
        error=0
        for x,y in zip(x_train,y_train):
            output=predict(network,x)
            
            error+=loss(y,output)
            grad=loss_prime(y,output)

            for layer in reversed(network):
                grad=layer.backward(grad,learning_rate)
    
        error/=len(x_train)
        if verbose:
            print(f"{e+1}/{epoches},error={error}")


4b

In [14]:
def preprocess_data(x,y,limit):
    x=x.reshape(x.shape[0],28*28,1)
    x=x.astype("float32")/255
    y=np_utils.to_categorical(y)
    y=y.reshape(y.shape[0],10,1)
    return x[:limit],y[:limit]

In [15]:
#Train for MNIST

#load MNIST from server
(x_train,y_train),(x_test,y_test)=mnist.load_data()
x_train,y_train=preprocess_data(x_train,y_train,1000)
x_test,y_test=preprocess_data(x_test,y_test,100)

network = [Dense(28*28,40),Tanh(),Dense(40,10),Tanh()]

train(network,mse,mse_prime,x_train,y_train,epoches=100,learning_rate=0.1,verbose=True)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
1/100,error=0.9161084293563976
2/100,error=0.8952523054391339
3/100,error=0.8777150081140463
4/100,error=0.8711780058974848
5/100,error=0.8902060959150276
6/100,error=0.8727149437545304
7/100,error=0.8704584507341042
8/100,error=0.8701658412169201
9/100,error=0.8786363970938902
10/100,error=0.8789965779063593
11/100,error=0.8841395341147794
12/100,error=0.8858067685485116
13/100,error=0.874726458794852
14/100,error=0.8796933440491796
15/100,error=0.8760759911753184
16/100,error=0.8814098922019783
17/100,error=0.8850066600357619
18/100,error=0.8947364504406403
19/100,error=0.8966763555982356
20/100,error=0.8807898567923736
21/100,error=0.8956857900025631
22/100,error=0.8875497388312678
23/100,error=0.8863406577977675
24/100,error=0.885822779784817
25/100,error=0.8747144473554391
26/100,error=0.8827086868271974
27/100,error=0.8744533233572028
28/100,error=0.8778264633660134
29/100,error=0.88190623

In [16]:
#test
correct=0
for x ,y in zip(x_test,y_test):
    output=predict(network,x)
    if np.argmax(output)==np.argmax(y):        
        correct=correct+1   
    print('pred:',np.argmax(output),'\ttrue:',np.argmax(y))
    #print('pred:',type(np.argmax(output)),'\ttrue:',type(np.argmax(y)))
accuracy=correct/len(y_test)*100
print("accuracy=",accuracy,"%")
print('correct',correct)

pred: 0 	true: 7
pred: 0 	true: 2
pred: 1 	true: 1
pred: 0 	true: 0
pred: 4 	true: 4
pred: 0 	true: 1
pred: 4 	true: 4
pred: 0 	true: 9
pred: 0 	true: 5
pred: 0 	true: 9
pred: 0 	true: 0
pred: 0 	true: 6
pred: 4 	true: 9
pred: 0 	true: 0
pred: 2 	true: 1
pred: 0 	true: 5
pred: 4 	true: 9
pred: 0 	true: 7
pred: 0 	true: 3
pred: 4 	true: 4
pred: 0 	true: 9
pred: 0 	true: 6
pred: 0 	true: 6
pred: 0 	true: 5
pred: 4 	true: 4
pred: 0 	true: 0
pred: 0 	true: 7
pred: 4 	true: 4
pred: 0 	true: 0
pred: 1 	true: 1
pred: 0 	true: 3
pred: 8 	true: 1
pred: 0 	true: 3
pred: 0 	true: 4
pred: 0 	true: 7
pred: 0 	true: 2
pred: 0 	true: 7
pred: 1 	true: 1
pred: 0 	true: 2
pred: 2 	true: 1
pred: 1 	true: 1
pred: 4 	true: 7
pred: 4 	true: 4
pred: 0 	true: 2
pred: 0 	true: 3
pred: 0 	true: 5
pred: 0 	true: 1
pred: 4 	true: 2
pred: 4 	true: 4
pred: 4 	true: 4
pred: 0 	true: 6
pred: 0 	true: 3
pred: 0 	true: 5
pred: 0 	true: 5
pred: 0 	true: 6
pred: 0 	true: 0
pred: 4 	true: 4
pred: 1 	true: 1
pred: 4 	true: