In [19]:
import numpy as np
import matplotlib.pyplot as plt

In [29]:
def linear(x):
    return x

def sigmoid(x):
    return 1/(np.exp(-x)+1)

def relu(x):
    return np.maximum(x, 0)

def softmax(x):
        new_x = np.zeros_like(x)
        for idx in range(x.shape[0]):
            tmp = x[idx,:]
            m = np.max(tmp)
            tmp -= m
            new_x[idx] = np.exp(tmp)/np.sum(np.exp(tmp))
        return new_x
    
def tanh(x):
    return (np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x))

In [30]:
# x = np.array([1,2,1000])
# softmax(x)

In [31]:
def mean_squared_error(t,y):
    return np.mean(np.square(t-y))

def binary_crossentropy(t,y):
    return -np.mean(t*np.log(y) + (1-t)*np.log(1-y))

def categorical_crossentropy(t,y):
    eps = 1e-7
    return -t*np.log(y+eps)

In [32]:
def grad(func, *args):
    h = 1e-4
    w = args[0]
    b = args[1]
    grad_w = np.zeros_like(w)
    grad_b = np.zeros_like(b)
    rows, cols = w.shape[0], w.shape[1]
    
    for row in range(rows):
        for col in range(cols):
            tmp = w[row, col]
            fx = func(args)
            w[row, col] += h
            fxh = func(args)
            dw = (fxh - fx)/h
            grad_w[row,col] = dw
            w[row,col] = tmp
    for i in range(b.size):
        tmp = b[i]
        fx = func(args)
        b[i] += h
        fxh = func(args)
        db = (fxh - fx)/h
        grad_b[i] =db
    return grad_w, grad_b

In [33]:
class Activation:
    def __init__(self, activation):
        self.activation = activation
        
    def __call__(self, x):
        if self.activation == 'sigmoid':
            return sigmoid(x)
        elif self.activation == 'relu':
            return relu(x)
        elif self.activation == 'softmax':
            return softmax(x)
        elif self.activation == 'linear':
            return linear(x)
        else:
            f'Activation Function Not Exist'

In [429]:
# Activation('sigmoid')(10)

In [430]:
# np.max(np.random.randn(10,3),1)

In [431]:
# np.sum(Activation('softmax')(np.random.randn(10,3)))

In [538]:
# class Net:
#     def __init__(self):
#         self.x = None
#         self.y = None
#         self.layers = []
#         self.count = 0
        
#     def add(self, output_shape, activation):
#         if len(self.layers) == 0:
#             w = np.random.randn(0, output_shape)
#             b = np.zeros(output_shape)
#             activation = activation
#             self.layers.append([w,b,activation])
#         else:
#             input_shape = self.layers[-1][0].shape[1]
#             w = np.random.randn(input_shape, output_shape) 
#             b = np.zeros(output_shape)
#             activation = activation
#             self.layers.append([w,b,activation])
    
#     def build(self, input_shape):
#         output_shape = self.layers[0][0].shape[1]
#         self.layers[0][0] = np.random.randn(input_shape, output_shape)
#         return f'Building Success !!'
        
#     def predict(self, x):
#         pred = x.copy()
#         for layer in self.layers:
#             w = layer[0]
#             b = layer[1]
#             activation = layer[2]
#             pred = np.dot(pred, w) + b
#             pred = Activation(activation)(pred)
#             self.pred = np.array(pred)
#         return self.pred
        
#     def loss(self,x,y):
#         y_pred = self.predict(x)
#         loss_ = categorical_crossentropy(y, y_pred)
#         return loss_
    
#     def descent_gradient(self,x,y):
#         loss_func = lambda W: self.loss(x,y)
#         for idx, layer in enumerate(self.layers):
#             w = layer[0]
#             b = layer[1]
#             self.layers[idx][0] -= 0.001*grad(loss_func,w,b)[0]
#             self.layers[idx][1] -= 0.001*grad(loss_func,w,b)[1]
    
#     def summary(self):
#         total_params = 0
#         print('================================================================')
#         print('---------------Output shape------------------params-------------')
#         for layer in self.layers:
#             print('---------------(None',layer[0].shape[1],')''-------------------',np.prod(layer[0].shape) + len(layer[1]),'----------')
#             total_params += np.prod(layer[0].shape) + len(layer[1])
#         print('================================================================')    
#         print('Trainable params ===',total_params)

In [433]:
# callable(model.loss)

In [434]:
# func = lambda w:model.loss(X_train, y_train)
# callable(func)

In [51]:
model = Net(input_shape=784)

In [52]:
model.add(256, activation='sigmoid')
model.add(128, activation='relu')
model.add(10, activation='softmax')

In [55]:
# model.build(784)

In [56]:
# model.summary()

In [438]:
# model.loss(X_train, y_train)

In [407]:
# layer = model.layers[0]

In [408]:
# tmp = model.layers[0][0][0,0]

In [409]:
# model.layers[0][0][0,0] += 0.0001

In [441]:
# model.layers[0][0][0,0]

In [439]:
# tmp

In [440]:
# model.loss(X_train, y_train)

In [451]:
# model.descent_gradient(X_train, y_train) ## 지나치게 오래걸림

In [442]:
%timeit model.predict(X_train)

2.12 s ± 31.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [443]:
# model.layers[2][0].shape

In [444]:
# model.layers[-1][0].shape[1]

In [445]:
# for layer in model.layers:
#     print(layer[2])

In [446]:
# X_train.shape

In [447]:
# model.layers[0][0].shape

In [448]:
# model.layers[-1][0].shape[1]

In [46]:
# class Net:
#     def __init__(self, input_shape):
#         self.x = None
#         self.y = None
#         self.layers = []
#         self.input_shape = input_shape
#         self.count = 0
#         self.activation_dic = {
#             'relu': ReLu,
#             'sigmoid':Sigmoid,
#             'tanh':Tanh,
#             'softmax':SoftmaxWithLogit
#         }
        
#     def add(self, output_shape, activation):
#         if len(self.layers) == 0:
#             w = np.random.randn(self.input_shape, output_shape)
#             b = np.zeros(output_shape)
#             activation = activation
#             self.layers.append([w,b,activation])
#         else:
#             input_shape = self.layers[-1][0].shape[1]
#             w = np.random.randn(input_shape, output_shape) 
#             b = np.zeros(output_shape)
#             activation = activation
#             self.layers.append([w,b,activation])
    
#     def _build(self):
#         self.W = {}
#         for i,layer in enumerate(self.layers):
#             w = layer[0]
#             b = layer[1]
#             activation = layer[2]
#             self.W['Affine_W' + str(i+1)]['W'] = w
#             self.W['Affine_b' + str(i+1)]['b'] = b
#             self.W['Activation' + str(i+1)]['activation'] = self.activation_dic[activation]
#         return f'Building Success !!'
        
#     def forward(self, x):
#         self._build()
#         # self.pred = x.copy()
#         # for layer in self.W.items():
#         #     self.pred = Affine().forward(self.pred, layer[0], layer[1])
#         #     self.pred = layer[2](self.pred)
        
#     def loss(self,x,y):
#         y_pred = self.predict(x)
#         loss_ = categorical_crossentropy(y, y_pred)
#         return loss_
    
#     def descent_gradient(self,x,y):
#         loss_func = lambda W: self.loss(x,y)
#         for idx, layer in enumerate(self.layers):
#             w = layer[0]
#             b = layer[1]
#             self.layers[idx][0] -= 0.001*grad(loss_func,w,b)[0]
#             self.layers[idx][1] -= 0.001*grad(loss_func,w,b)[1]
    
#     def summary(self):
#         total_params = 0
#         print('================================================================')
#         print('---------------Output shape------------------params-------------')
#         for layer in self.layers:
#             print('---------------(None',layer[0].shape[1],')''-------------------',np.prod(layer[0].shape) + len(layer[1]),'----------')
#             total_params += np.prod(layer[0].shape) + len(layer[1])
#         print('================================================================')    
#         print('Trainable params ===',total_params)

In [47]:
class Net:
    def __init__(self,input_shape):
        self.x = None
        self.y = None
        self.input_shape=input_shape
        self.layers = []
        self.activation_dic = {
            'relu':ReLU,
            'sigmoid':Sigmoid,
            'tanh':Tanh,
            'softmax':SoftmaxWithLogit,
        }
        
    def add(self,output_shape,activation):
        if len(self.layers) == 0:
            w = np.random.randn(self.input_shape,output_shape)
            b = np.zeros(output_shape)
            activation = activation
            self.layers.append([w,b,activation])
        else:
            input_shape = self.layers[-1][0].shape[1]
            w = np.random.randn(input_shape,output_shape)
            b = np.zeros(output_shape)
            activation = activation
            self.layers.append([w,b,activation])
    
    def _build(self):
        self.W = {}
        for i,layer in enumerate(self.layers):
            w = layer[0]
            b = layer[1]
            activation = layer[2]
            self.W['Affine_W'+str(i+1)] = w
            self.W['Affine_b'+str(i+1)] = b
            self.W['Activation'+str(i+1)] = self.activation_dic[activation]
        return f'Building Success !!'
    
    def forward(self,x):
        self._build()
        # self.pred = x.copy()
        # for key,value in self.W.items():
        #     self.pred = Affine().forward(self.pred,layer[0],layer[1])
        #     self.pred = layer[2](self.pred)

    def loss(self,x,y):
        y_pred = self.predict(x)
        loss_ = categorical_crossentropy(y,y_pred)
        return loss_
    
    def descent_gradient(self,x,y):
        loss_func = lambda W: self.loss(x,y)
        for idx ,layer in enumerate(self.layers):
            w = layer[0]
            b = layer[1]
            self.layers[idx][0] -= 0.001*grad(loss_func,w,b)[0]
            self.layers[idx][1] -= 0.001*grad(loss_func,w,b)[1]
        
    
    def summary(self):
        total_params = 0
        print('===========================================================')
        print('------------------------Output Shape-------------params----')
        for layer in self.layers:
            print('------------------------ (None,',layer[0].shape[1],')------------',np.prod(layer[0].shape) + len(layer[1]),'----------')
            total_params += np.prod(layer[0].shape) + len(layer[1])
        print('===========================================================')
        print('Trainable params ------',total_params)

In [48]:
model = Net(input_shape=784)

In [49]:
model.add(256, activation='relu')
model.add(128, activation='relu')
model.add(10, activation='softmax')

In [3]:
# x = X_train[:10, :]
# for k, v in model.W.items():
#     x = Affine().forward(x, v[0], v[1])
#     x = v[2](x)

In [1]:
# model.forward(X_train)

In [50]:
model.W.get('Affine_W1')

AttributeError: 'Net' object has no attribute 'W'

In [42]:
class ReLU:
    def __init__(self):
        self.out = None
    
    def forward(self,x):
        out = np.maximum(0,x)
        self.out = np.where(out > 0,1, 0)
        return out 
    
    def backward(self,out):
        dout = self.out*out
        return dout

class Sigmoid:
    def __init__(self):
        self.out = None
    
    def forward(self,x):
        return sigmoid(x)
    
    def backward(self,out):
        dout = sigmoid(out)*(1-sigmoid(out))
        return dout

class Affine:
    def __init__(self):
        self.x = None
        self.w = None
        self.b = None
        
    def forward(self,x,w,b):
        self.x = x # 100,5
        self.w = w # 5,10
        self.b = b # 10
        self.out = np.dot(self.x,self.w) + self.b
        return self.out

    def backward(self,out):
        dout = np.dot(out,self.w.T) # 100,10
        dW = np.dot(self.x.T,out)
        db = np.sum(out,axis=0)
        return dout, dW, db

class Tanh:
    def __init__(self):
        self.out = None
    
    def forward(self,x):
        self.out = tanh(x)
        return self.out
    
    def backward(self,out):
        dout = 1-tanh(out)**2
        return dout

class SoftmaxWithLogit:
    def __init__(self):
        self.out = None
        self.y = None
        self.y_pred = None
    
    def forward(self,x,y):
        self.y_pred = softmax(x)
        self.y = y
        self.out = categorical_crossentropy(self.y,self.y_pred)
        return self.out
    
    def backward(self,out=1):
        out = 1
        self.dout = out*(self.y - self.out)
        return self.dout

In [45]:
# class ReLu:
#     def __init(self):
#         self.out = None
    
#     def forward(self, x):
#         out = np.maximum(0,x)
#         self.out = np.where(out > 0, 1, 0)
#         return out
    
#     def backward(self, out):
#         dout = self.out * out
#         return dout
    
# class Sigmoid:
#     def __init__(self):
#         self.out = None
    
#     def forward(self, x):
#         return sigmoid(x)
    
#     def backward(self, out):
#         dout = sigmoid(out)*(1 - sigmoid(out))
#         return dout
    
# class Affine:
#     def __init__(self):
#         self.x = None
#         self.w = None
#         self.b = None
        
#     def forward(self,x,w,b):
#         self.x = x # 100, 5
#         self.w = w # 5, 10
#         self.b = b # 10
#         self.out = np.dot(self.x, self.w) + self.b
#         return self.out
    
#     def backward(self, out):
#         dout = np.dot(out, self.w.T) # 100,10
#         dW = np.dot(self.x.T, out)
#         db = np.sum(out, axis=0)
#         return dout, dW, db
    
# class Tanh:
#     def __init__(self):
#         self.out = None
        
#     def forward(self,x):
#         self.out = tahn(x)
#         return self.out
    
#     def backward(self, dout):
#         dout = 1 - tahn(out)**2
#         return dout
    
# class SoftmaxWithLogit:
#     def __init__(self):
#         self.out = None
#         self.y = None
#         self.y_pred = None
        
#     def forward(self,x,y):
#         self.y_pred = softmax(x)
#         self.y = y
#         self.out = categorical_crossentropy(self.y, self.y_pred)
#         return self.out
        
#     def backward(self, out=1):
#         out = 1
#         self.dout = out*(self.y - self.out)
#         return self.dout
        

In [515]:
# categorical_crossentropy()

In [535]:
soft = SoftmaxWithLogit()

In [536]:
x = np.random.randn(100,5)
y = np.random.randn(100,5)
soft.forward(x,y)

array([[-1.25503545e+00,  1.74715803e+00,  3.74863241e+00,
        -1.63258379e+00, -4.07501293e+00],
       [-1.40456935e+00,  2.63964338e+00, -6.12191798e+00,
        -5.66625377e-02, -2.25604199e-01],
       [ 4.95560999e-01, -5.53988434e-03, -4.63899109e-01,
         2.94631548e+00,  2.01226595e+00],
       [ 1.19477020e+00,  9.48798275e-02,  8.59561738e-01,
         8.29564029e-01,  1.22549686e+00],
       [-5.08173944e+00,  8.95197390e-01,  6.20937704e+00,
         1.22542824e+00,  1.05516414e+00],
       [ 7.85516178e-01,  2.16258640e-01, -3.89275133e+00,
         6.39713379e-01, -1.01103652e+00],
       [-3.35432667e+00,  2.11878153e-01, -6.73472737e-01,
         6.15079329e+00,  6.71405178e+00],
       [ 5.74709584e-01,  7.65236447e-01,  1.83258715e+00,
         3.16726995e-02,  3.40563871e+00],
       [-8.41863600e-02,  7.71170346e-02,  1.08525432e+00,
         1.86514144e-01,  3.86634236e+00],
       [ 8.79177804e-01,  6.89781929e-01, -6.03973650e-01,
        -1.29838554e+00

In [537]:
soft.backward()

array([[ 4.78365636e-01, -3.03022684e-01, -2.29654096e+00,
         4.73054981e-01,  1.67744787e+00],
       [ 4.74449274e-01, -1.63668037e+00,  4.46127222e+00,
        -6.82148116e-02,  1.51890992e-01],
       [-2.02812426e-01,  3.03171960e-03, -1.13699666e-01,
        -1.47912566e+00, -1.04828840e+00],
       [-6.62412839e-01, -5.99449652e-02,  2.09192955e+00,
        -5.94880611e-01, -8.12756196e-01],
       [ 2.77481026e+00, -1.63607506e-01, -4.41925354e+00,
         3.81741866e-01, -6.01533990e-01],
       [ 1.04172543e-01, -1.39611666e-01,  2.04463838e+00,
        -2.03063577e-01,  4.32573921e-01],
       [ 2.43329018e+00,  1.58371682e+00,  4.83170260e-01,
        -4.35233902e+00, -4.92712026e+00],
       [-4.82882408e-02, -4.36695374e-01, -6.97718621e-01,
        -7.79443477e-03, -1.90968535e+00],
       [ 5.62788543e-02,  2.67878795e-02, -6.74153942e-01,
        -1.23282740e-02, -2.48517481e+00],
       [ 4.92206143e-02, -2.09481108e-01,  1.26424181e-01,
         8.82626042e-01

In [387]:
act_relu = ReLu()

In [None]:
x = np.random.randn(3,2)

In [None]:
y = np.random.randn(3,2)

In [None]:
y

In [392]:
act_relu.forward(x)

array([[0.        , 1.89358101],
       [0.68323854, 0.        ],
       [0.        , 0.        ]])

In [393]:
act_relu.out

array([[0, 1],
       [1, 0],
       [0, 0]])

In [455]:
sig = Sigmoid()

In [456]:
x = np.random.randn(3,2)
y = np.random.randn(3,2)

In [457]:
sig.forward(x)

array([[0.4583564 , 0.61050831],
       [0.30612941, 0.38943397],
       [0.40652614, 0.54541952]])

In [458]:
sig.backward(y)

array([[0.12404108, 0.12531847],
       [0.24022841, 0.18711922],
       [0.15535924, 0.20512248]])

In [483]:
aff = Affine()

In [484]:
x = np.random.randn(100,5)
w = np.random.randn(5,10)
b = np.zeros(10)

In [485]:
aff.forward(x,w,b)

array([[-2.36578068e+00,  8.07075773e-02,  8.20550087e-01,
         2.60672452e+00, -1.98774094e+00,  1.22184815e+00,
         9.21973891e-01,  7.10385081e+00, -1.08070264e+00,
         8.93777599e-01],
       [ 2.87187288e+00,  1.06348750e+00, -3.56890909e+00,
        -9.11286492e-01,  4.40677924e+00,  6.13799781e-01,
        -2.01841799e+00, -3.34311883e+00,  2.78752154e+00,
         1.45167532e+00],
       [-3.40822456e+00, -2.60718917e+00,  2.13796438e+00,
        -5.58007598e+00, -5.06789972e+00, -1.42405357e+00,
        -3.31968030e-01, -2.37840062e-01, -3.03154357e+00,
        -4.38245237e+00],
       [ 2.35280063e+00,  3.13291143e-03, -1.97898616e+00,
        -4.58765758e+00, -6.45055263e-01,  4.01616266e+00,
         3.15636606e+00,  4.19801486e+00, -9.32986240e-02,
        -3.18665816e+00],
       [ 1.63407248e-01, -5.58610309e-01,  7.10144217e-02,
        -1.27033271e+00,  2.28446956e+00, -1.79706569e+00,
         1.39766461e-01, -2.60736284e+00,  1.94052194e+00,
        -8.

In [476]:
aff.x.shape

(100, 5)

In [477]:
aff.w.shape

(5, 10)

In [478]:
aff.out.shape

(100, 10)

In [486]:
out = np.random.randn(100,10)

In [487]:
aff.backward(out)

(array([[ 2.83279390e+00, -8.24010859e+00, -1.55092007e+00,
         -3.77438398e+00,  3.23781971e+00],
        [-4.50358719e+00,  7.52171474e-01, -6.25580747e+00,
          4.71562646e-02, -8.70599838e+00],
        [ 5.14165455e-01, -2.93820593e+00,  1.87183627e+00,
          1.99049706e+00,  7.80695668e-03],
        [ 4.02813656e+00, -1.91909422e+00, -1.05141175e+00,
         -1.90601679e+00,  5.18365351e+00],
        [-2.45882623e+00, -1.34438518e+00, -2.37176795e+00,
          3.06409578e+00,  3.85427219e+00],
        [ 3.25472358e+00,  6.73828366e+00,  2.39574207e+00,
         -6.60736444e+00,  4.72063193e+00],
        [-3.96846180e+00, -8.70112315e-01, -8.35930002e-01,
          6.56946287e+00,  2.93822769e+00],
        [-1.40634783e+00, -3.19133049e+00, -4.13169339e+00,
          3.18109709e+00, -4.46935638e+00],
        [-1.16401879e+00,  3.54963061e+00,  1.43783452e+00,
         -1.58436320e+00,  3.78866668e+00],
        [ 7.97488928e-01,  2.95570181e+00,  4.93109597e-01,
    

In [488]:
aff.dW.shape

(5, 10)

In [489]:
aff.dout.shape

(100, 5)

In [74]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.utils import to_categorical

In [59]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(-1,784)/255.
y_train = to_categorical(y_train)

In [60]:
model = Sequential()
model.add(Dense(256, activation='sigmoid', input_shape=(784,)))
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))

In [72]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 256)               200960    
                                                                 
 dense_4 (Dense)             (None, 128)               32896     
                                                                 
 dense_5 (Dense)             (None, 10)                1290      
                                                                 
Total params: 235,146
Trainable params: 235,146
Non-trainable params: 0
_________________________________________________________________


In [81]:
model.layers[0].get_weights()[0].shape ## weight
# model.layers[0].get_weights()[1].shape ## bias

(784, 256)

In [75]:
model = Sequential()
model.add(Dense(256,input_shape=(784,)))
model.add(Activation('sigmoid'))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dense(10))
model.add(Activation('softmax'))

In [76]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 256)               200960    
                                                                 
 activation (Activation)     (None, 256)               0         
                                                                 
 dense_7 (Dense)             (None, 128)               32896     
                                                                 
 activation_1 (Activation)   (None, 128)               0         
                                                                 
 dense_8 (Dense)             (None, 10)                1290      
                                                                 
 activation_2 (Activation)   (None, 10)                0         
                                                                 
Total params: 235,146
Trainable params: 235,146
Non-tr

In [52]:
model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])

In [54]:
model.fit(X_train, y_train, epochs=5, batch_size=256)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fac42cf4520>