In [None]:
import numpy as np

class Activation:
    
    def relu(self, x):
        return np.maximum(x, 0)
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    def tanh(self, x):
        return np.tanh(x)
    def linear(self, x):
        return x
    def softmax(self, x):
        exps = np.exp(x - x.max())
        return exps / np.sum(exps)

    def g_relu(self, x):
        return 1 * (x > 0)
    def g_sigmoid(self, x):
        return (1 - x) * x
    def g_tanh(self, x):
        return 1 - x*x
    def g_linear(self, x):
        return 1 * (x==x)
    def g_softmax(self, x):
        dx_ds = np.diag(x) - np.dot(x, x.T)
        return dx_ds.sum(axis=0).reshape(-1, 1) 

    
    def __init__(self, acti):
        funcs = {
            "TANH" : self.tanh,
            "SIGMOID" : self.sigmoid,
            "RELU" : self.relu,
            "LINEAR" : self.linear,
            "SOFTMAX" : self.softmax
        }

        grads = {
            "TANH" : self.g_tanh,
            "SIGMOID" : self.g_sigmoid,
            "RELU" : self.g_relu,
            "LINEAR" : self.g_linear,
            "SOFTMAX" : self.g_softmax
        }
        self.acti = acti
        self.func = funcs[acti]
        self.grad = grads[acti]
        
        return       
    
    def __str__(self):
        s = "\nActivation:" + self.acti
        return s

In [None]:
import random

class LinearLayer:

    def __init__(self, n_x, n_y, W=None, b=None):
        self.n_y = n_y
        self.n_x = n_x
        
        size = n_x*n_y
        if W == None:
            weights = np.random.uniform(-1, 1, size=size).reshape(n_y, n_x)
            scale = np.sqrt(2./size)
            self.W = weights * scale
        else: 
            nu, ni = W.shape
            if nu != n_y or ni != n_x:
                raise ValueError("Incorrect weights input")
            else:
                self.W = W
              
        self.b = b if b != None else np.zeros(shape=(n_y, 1))

        # for debugging purpose
        #self.W = np.ones(shape=(n_y, n_x))
        #self.b = np.ones(shape=(n_y, 1))

        self.x = None
        self.signal = None
        self.y = None

        self.g_y = None
        self.g_W = None
        self.g_b = None
        self.g_x = None
        return
        
    def forward(self, x):
        self.x = x
        self.y = np.dot(self.W, x) + self.b
        self.signal = self.y
        return self.y

    def backward(self, node_grad):       
        self.g_W = np.outer(node_grad, self.x)
        self.g_b = node_grad
        self.g_x = np.dot(self.W.T, node_grad)         
        return self.g_x

    def update(self, learning):
        self.W = self.W - self.g_W * learning
        self.b = self.b - self.g_b * learning
        return
            
    def __str__(self):
        s = "\nx is:\n"+str(self.x)
        s += "\nW is:\n" + str(self.W)
        s += "\nb is:\n" + str(self.b)
        s += "\ny is:\n"+str(self.y)
        s += "\ng_y is:\n"+str(self.g_y)
        s += "\ng_W is:\n"+str(self.g_W)
        s += "\ng_b is:\n"+str(self.g_b)
        s += "\ng_x is:\n"+str(self.g_x)
        return s
        

In [None]:
class PercepLayer(LinearLayer):
        
    def __init__(self, n_x, n_y, W=None, b=None, acti="RELU"):
        super().__init__(n_x, n_y, W=None, b=None)
        self.activation = Activation(acti)
        return
        
    def forward(self, x):
        if x.size != self.n_x: 
            raise ValueError("Incorrect data input", x.size, self.n_x)
        # linear part first
        signal = super().forward(x)
        # nonlinear
        self.y = self.activation.func(signal) 
        return self.y
      
    def backward(self, g_y):
        if g_y.size != self.n_y: 
            raise ValueError("Incorrect data input")
        # nonlinear    
        self.g_y = g_y
        node_grad = self.activation.grad(self.y)
        node_grad = node_grad * g_y
        # linear
        self.g_x = super().backward(node_grad)
        return self.g_x

    def update(self, learning):
        super().update(learning)
        return

    def __str__(self):
        s = super().__str__()
        s += str(self.activation)
        return s
            

In [None]:
class SoftMaxLayer(PercepLayer):
        
    def __init__(self, n_x, n_y, W=None, b=None):
        super().__init__(n_x, n_y, W=None, b=None, acti="SOFTMAX")
        self.predict = None
        self.truth = None
        return
        
    def forward(self, x):
        # hidden part first, computing softmax
        self.y = super().forward(x)
        self.predict = self.y.argmax()
        return self.predict
      
    def backward(self, in_truth):
        # softmax gradient, dL/ds (combining the cost and activation layer)
        self.truth = np.zeros(shape=(self.n_y,1))
        self.truth[in_truth] = 1
        node_grad = self.y - self.truth
        # then linear layer
        self.g_x = LinearLayer.backward(self, node_grad)
        return self.g_x
    
    def update(self, learning):
        super().update(learning)
        return
    
    def __str__(self):
        s = super().__str__()
        s += "\nPrediction:\n" + str(self.predict)
        s += "\nTruth:\n" + str(self.truth)
        return s
            

In [None]:
class MSELayer(PercepLayer):
        
    def __init__(self, n_x, n_y, W=None, b=None):
        super().__init__(n_x, n_y, W=None, b=None, acti="LINEAR")
        self.predict = None
        self.truth = None
        return
        
    def forward(self, x):
        # hidden part first, computing softmax
        self.y = super().forward(x)
        self.predict = self.y
        return self.predict
      
    def backward(self, label):
        # softmax gradient, dL/ds (combining the cost and activation layer)
        self.truth = label
        node_grad = self.y - self.truth
        # then linear layer
        self.g_x = LinearLayer.backward(self, node_grad)
        return self.g_x
    
    def update(self, learning):
        super().update(learning)
        return
    
    def __str__(self):
        s = super().__str__()
        s += "\nPrediction:\n" + str(self.predict)
        s += "\nTruth:\n" + str(self.truth)
        return s
            

In [None]:
class MLP:
        
    def __init__(self, n_x=28*28, n_y=28*28+1, n_class=10, learning=0.1, acti="RELU"):   

        self.type = "MLP"

        self.n_x = n_x
        self.n_y = n_y if (n_y != None) else (n_x + 1)
        self.hidden_layer1 = PercepLayer(n_x, n_y, acti=acti)
        #self.hidden_layer2 = PercepLayer(n_y, n_y)
        self.output_layer = SoftMaxLayer(n_y, n_class)
        
        self.learning = learning
        return
    
    def forward(self, x):
        y = self.hidden_layer1.forward(x)
        #y = self.hidden_layer2.forward(y)
        self.output_layer.forward(y)
        return
    
    def backward(self, label):
        g_x = self.output_layer.backward(label)
        #g_x = self.hidden_layer2.backward(g_x)
        self.hidden_layer1.backward(g_x)
        return
    
    def update(self):
        self.hidden_layer1.update(self.learning)
        #self.hidden_layer2.update(self.learning)
        self.output_layer.update(self.learning)
        return
        
    def train_1sample(self, x, label):
        self.forward(x)
        self.backward(label)
        self.update()
        return
    
    def predict_1sample(self, x):
        y = self.hidden_layer1.forward(x)
        #y = self.hidden_layer2.forward(y)
        predict = self.output_layer.forward(y)
        return predict
 

In [None]:
%%capture
from IPython.core.debugger import set_trace
def is_main_module():
    return __name__ == '__main__' and '__file__' not in globals()

In [None]:
def run_mlp_test():
    mlp = MLP(3,4,3, acti="RELU")
    X = [
        [1,2,3],
        [2,1,3],
        [3,1,2],
        [3,2,1],
        [1,3,2],
        [2,3,1]
        ]
    Y = [2,2,0,0,1,1]
    X = np.array(X)
    Y = np.array(Y)

    for j in range(40):
        #if j % 10 == 0:
        #    print(mlp.hidden_layer1)
        #    print(mlp.output_layer)
            
        for i in range(X.shape[0]):
            mlp.train_1sample(X[i].reshape(-1,1), Y[i])

    X = np.array([[1,2,3],[2,3,1],[3,1,2]])
    for i in range(X.shape[0]):
        predict = mlp.predict_1sample(X[i].reshape(-1,1))
        print("\nPredict: ", X[i], predict)

if is_main_module():
    run_mlp_test()

In [None]:
%run 'mnist.ipynb'
def run_mlp_mnist():
    mlp = MLP(28*28, 28*28+1, acti="SIGMOID")
    mnist = MNIST(mlp, folder="../convolution-network")
    for i in range(5):
        mnist.train(-1)
        accuracy = mnist.test(-1)
        print("\nAccuracy of epoch {} is {}".format(i, accuracy))
    return mnist

mnist = None
if is_main_module():
    mnist = run_mlp_mnist()

In [None]:
%run 'cifar-10.ipynb'
def run_mlp_cifar10():
    mlp = MLP(32*32*3, 32*32*2+1, learning=0.1, acti="SIGMOID")
    cifar = CIFAR10(mlp, folder="../convolution-network")
    for i in range(5):
        cifar.train(-1)
        accuracy = cifar.test(-1)
        print("\nAccuracy of epoch {} is {}".format(i, accuracy))
    return cifar

cifar = None
if is_main_module():
    cifar = run_mlp_cifar10()