In [147]:
import numpy as np
import matplotlib.pyplot as plt

def linear(x):
    return x

def sigmoid(x):
    return 1/(1+np.exp(-x))

def relu(x):
    return np.maximum(0,x)

def softmax(x):
    new_x = np.zeros_like(x)
    for idx in range(x.shape[0]):
        tmp = x[idx,:]
        m = np.max(tmp)
        tmp -= m
        new_x[idx] = np.exp(tmp)/np.sum(np.exp(tmp))
    return new_x

def tanh(x):
    return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))

In [115]:
def mean_squared_error(t,y):
    return np.mean(np.square(t-y))

def binary_crossentropy(t,y):
    return -np.mean(t*np.log(y) + (1-t)*np.log(1-y))

def categorical_crossentropy(t,y):
    eps = 1e-7
    return -np.mean(t*np.log(y+eps))

In [116]:
# import numpy as np
# import matplotlib.pyplot as plt

# def linear(x):
#     return x

# def sigmoid(x):
#     return 1/(np.exp(-x)+1)

# def relu(x):
#     return np.maximum(x, 0)

# def softmax(x):
#         new_x = np.zeros_like(x)
#         for idx in range(x.shape[0]):
#             tmp = x[idx,:]
#             m = np.max(tmp)
#             tmp -= m
#             new_x[idx] = np.exp(tmp)/np.sum(np.exp(tmp))
#         return new_x
    
# def tanh(x):
#     return (np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x))

# def mean_squared_error(t,y):
#     return np.mean(np.square(t-y))

# def binary_crossentropy(t,y):
#     return -np.mean(t*np.log(y) + (1-t)*np.log(1-y))

# def categorical_crossentropy(t,y):
#     eps = 1e-7
#     return -np.mean(t*np.log(y+eps))

In [134]:
class ReLU:
    def __init__(self):
        self.out = None
    
    def forward(self,x):
        out = np.maximum(0,x)
        self.out = np.where(out > 0,1, 0)
        return out 
    
    def backward(self,out):
        dout = self.out*out
        return dout


class Sigmoid:
    def __init__(self):
        self.out = None
    
    def forward(self,x):
        return sigmoid(x)
    
    def backward(self,out):
        dout = sigmoid(out)*(1-sigmoid(out))
        return dout

class Affine:
    def __init__(self,w,b):
        self.x = None
        self.w = w
        self.b = b
   
    def forward(self,x):
        self.x = x  
        self.out = np.dot(self.x,self.w) + self.b
        return self.out
   
    def backward(self,out):
        dout = np.dot(out,self.w.T) # 100,10
        dW = np.dot(self.x.T,out)
        db = np.sum(out,axis=0)
        return dout

class Tanh:
    def __init__(self):
        self.out = None
    
    def forward(self,x):
        self.out = tanh(x)
        return self.out
    
    def backward(self,out):
        dout = 1-tanh(out)**2
        return dout

class SoftmaxWithLogit:
    def __init__(self):
        self.out = None
        self.y = None
        self.y_pred = None
    
    def forward(self,x):
        self.y_pred = softmax(x)
        return self.y_pred
    
    def backward(self,out=1):
        out = 1
        self.dout = out*(self.y - self.y_pred)
        return self.dout

In [163]:
class Net:
    def __init__(self,input_shape):
        self.x = None
        self.y = None
        self.input_shape=input_shape
        self.layers = []
        self.activation_dic = {
            'relu':ReLU,
            'sigmoid':Sigmoid,
            'tanh':Tanh,
            'softmax':SoftmaxWithLogit,
        }
        
    def add(self,output_shape,activation):
        if len(self.layers) == 0:
            w = np.random.randn(self.input_shape,output_shape)
            b = np.zeros(output_shape)
            activation = activation
            self.layers.append([w,b,activation])
        else:
            input_shape = self.layers[-1][0].shape[1]
            w = np.random.randn(input_shape,output_shape)
            b = np.zeros(output_shape)
            activation = activation
            self.layers.append([w,b,activation])
    
    def _build(self):
        self.W = {}
        for i,layer in enumerate(self.layers):
            w = layer[0]
            b = layer[1]
            activation = layer[2]
            self.W['Affine_'+str(i+1)] = Affine(w,b)
            self.W['Activation_'+str(i+1)] = self.activation_dic[activation]
        return f'Building Success !!'
    
    def predict(self,x):
        if self.grad is None:
            self._build()
        self.y_pred = x
        for k,v in self.W.items():
            if 'Affine' in k:
                self.y_pred = v.forward(self.y_pred)
            else:
                self.y_pred = v.forward(self.y_pred)
        return self.y_pred

    def gradient(self,x,y):
        self.y_pred = self.predict(x)
        self.y = y
        self.grad = {}
        out = 1
        last_layer = list(self.W.keys())[-1] 
        out = self.W.get(last_layer).backward(self.y,out)
        for key in list(self.W.keys())[::-1]:
            out = self.W.get(key).backward(out)
        cnt = 1
        for key in self.W.keys():
            if 'Affine' in key:
                self.grad['W'+str(cnt)] = self.W.get(key).dW
                self.grad['b'+str(cnt)] = self.W.get(key).db
                cnt += 1
        return self.grad
        
        
    def loss(self,x,y):
        y_pred = self.predict(x)
        loss_ = categorical_crossentropy(y,y_pred)
        return loss_
    
    def descent_gradient(self,x,y):
        loss_func = lambda W: self.loss(x,y)
        for idx ,layer in enumerate(self.layers):
            w = layer[0]
            b = layer[1]
            self.layers[idx][0] -= 0.001*grad(loss_func,w,b)[0]
            self.layers[idx][1] -= 0.001*grad(loss_func,w,b)[1]
        
    
    def summary(self):
        total_params = 0
        print('===========================================================')
        print('------------------------Output Shape-------------params----')
        for layer in self.layers:
            print('------------------------ (None,',layer[0].shape[1],')------------',np.prod(layer[0].shape) + len(layer[1]),'----------')
            total_params += np.prod(layer[0].shape) + len(layer[1])
        print('===========================================================')
        print('Trainable params ------',total_params)
        
        
    def fit(self, x,y, epochs, lr=1e-3):
        self.grad = self.gradient(x,y)
        for epoch in range(epochs):
            for idx in range(len(self.layers)):
                self.W.get('Affine_'+str(idx+1)).w -= lr*self.grad('W'+str(idx+1))
                self.W.get('Affine_'+str(idx+1)).b -= lr*self.grad('b'+str(idx+1))
                

In [164]:
model = Net(input_shape=784)

In [165]:
model.add(256,activation='relu')
model.add(256,activation='relu')
model.add(256,activation='relu')
model.add(128,activation='relu')
model.add(10,activation='softmax')

In [162]:
model.gradient(np.random.randn(100,784),np.random.randn(100,10))

TypeError: forward() missing 1 required positional argument: 'x'

In [132]:
model.backward(np.random.randn(10,30),np.random.randn(10,30))

AttributeError: 'Net' object has no attribute 'backward'

In [None]:
model.W.get('Affine_3').dW.shape

AttributeError: 'Affine' object has no attribute 'dW'

In [None]:
x = np.random.randn(10,30)
model.forward(x).shape

(10, 5)

In [None]:
model.W

{'Affine_1': <__main__.Affine at 0x7f6072ff5a90>,
 'Activation_1': __main__.ReLU,
 'Affine_2': <__main__.Affine at 0x7f607348a610>,
 'Activation_2': __main__.ReLU,
 'Affine_3': <__main__.Affine at 0x7f607348ac70>,
 'Activation_3': __main__.SoftmaxWithLogit}

In [126]:
model.W.keys()

dict_keys(['Affine_1', 'Activation_1', 'Affine_2', 'Activation_2', 'Affine_3', 'Activation_3'])

In [127]:
model.activation_dic

{'relu': __main__.ReLU,
 'sigmoid': __main__.Sigmoid,
 'tanh': __main__.Tanh,
 'softmax': __main__.SoftmaxWithLogit}

In [128]:
x = np.random.randn(10,30)
for layer in model.layers[:-1]:
    w = layer[0]
    b = layer[1]
    activation = model.activation_dic.get(layer[2])
    x = activation().forward(np.dot(x,w) + b)
# w, b, activation = model.layers[-1]


In [129]:
x = np.random.randn(10,30)
for key in model.W.keys():
    if 'W' in key :
        x = np.dot(x,model.W[key])
    if 'b' in key:
        x = x + model.W[key]
    if 'Acti' in key:
        x = model.W[key]().forward(x)

In [130]:
x.shape

(10, 30)