## Summary

### function

In [387]:
import numpy as np

def sigmoid(x):
    return 1/(1+np.exp(-x))

def relu(x):
    return np.maximum(0,x)

def softmax(x):
    c = np.max(x,axis=1).reshape(-1,1)
    x = x-c
    return np.exp(x)/np.sum(np.exp(x),axis=1).reshape(-1,1)

def categorical_crossentropy(t,y):
    return np.mean(-t*np.log(y+0.0001))

def make_one(x):
    result = np.zeros((x.size, np.unique(x).size))
    for idx1,idx2 in enumerate(x):
        result[idx1,idx2] = 1
    return result

### Relu, Sigmoid, Affine, Loss

In [364]:
import numpy as np
class Relu:
    def __init__(self):
        self.mask = None
    def forward(self,x):
        self.mask = (x <=0)
        out = x.copy()
        out[self.mask] = 0
        return out
    def backward(self,dout):
        dout[self.mask] = 0
        dx = dout
        return dx
        
class Sigmoid:
    def __init__(self):
        self.out = None
    
    def forward(self,x):
        out = sigmoid(x)
        self.out = out
        return out
    
    def backward(self,dout):
        dx = ((1-self.out)*self.out)*dout
        return dx

class Affine:
    def __init__(self,W,b):
        self.W = W
        self.b = b
        self.x = None
        self.origin_shape = None
        self.dW = None
        self.db = None
    
    def forward(self,x):
        self.origin_shape = x.shape
        self.x = x
        out = np.dot(x,self.W) + self.b
        return out
    
    def backward(self,dout):
        dx = np.dot(dout,self.W.T)
        self.dW = np.dot(self.x.T,dout)
        self.db = np.sum(dout,axis=0)
        dx = dx.reshape(self.origin_shape)
        return dx

class Loss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None
    
    def forward(self,t,y):
        self.y = softmax(y)
        self.t = t
        self.loss = categorical_crossentropy(self.t, self.y)
        return self.loss
    
    def backward(self,dout=1):
        dx = (self.y - self.t)*dout
        return dx

### Network => Layers

In [365]:
class Layers:
    def __init__(self):
        self.layers = {}
        
    def add(self,x1,x2,activation):
        activation_dict = {
            'sigmoid':Sigmoid,
            'relu':Relu,
            'softmax':Loss
        }
        w = np.random.randn(x1,x2)
        b = np.zeros(x2)
        activation_layer = 'activation'+str((int(len(self.layers)/2+1))) 
        Affine_layer = 'Affine'+str((int(len(self.layers)/2+1)))
        self.layers[Affine_layer] = Affine(w,b)
        self.layers[activation_layer] = activation_dict[activation]()
        
    
    def predict(self,x):
        out = x.copy()
        ind = 1 
        layer_len = len(self.layers)
        for key, layer in self.layers.items():
            if ind < layer_len :
                out = layer.forward(out)
            ind += 1
        return out
            
    
    def loss(self,x,t):
        y = self.predict(x)
        out = list(self.layers.values())[-1].forward(t,y)
        return out
    
    def accuracy(self,x,t):
        y = self.predict(x)
        y = np.argmax(y,axis=1)
        t = np.argmax(t,axis=1)
        self.acc = np.sum(y==t)/t.size
        return self.acc
    
    def gradient(self,x,t):
        self.loss(x,t)
        lr = 1e-4
        dout = 1
        dout = list(self.layers.values())[-1].backward(dout)
        layers = list(self.layers.values())[::-1][1:]
        self.layers_key = list(self.layers.keys())[::-1][1:]
        for layer in layers:
            dout = layer.backward(dout)
        self.grads = {}
        for layer_key in self.layers_key:
            if 'Affine' in layer_key:
                self.grads[layer_key] = [self.layers[layer_key].dW, self.layers[layer_key].db]
        for layer_key in self.layers_key:
            if 'Affine' in layer_key:
                self.layers[layer_key].W -= lr*self.grads[layer_key][0]
                self.layers[layer_key].b -= lr*self.grads[layer_key][1]
        result = self.loss(x,t)       
        return result
    
    def fit(self,x,t,epochs,lr):
        self.lr = lr
        self.history = {}
        accuracy = []
        loss = []
        for epoch in range(epochs):
            self.gradient(x,t)
            loss.append(self.err)
            accuracy.append(self.accuracy(x,t))
            if epoch % 100 == 0:
                print(f'loss : {self.err} === accuracy : {self.accuracy(x,t)}')
        self.history['accuracy'] = accuracy
        self.history['loss'] = loss
                

In [366]:
network = Layers()

In [367]:
network.add(4,10,"relu")

In [368]:
network.add(10,3,"sigmoid")

In [369]:
network.add(3,4,"softmax")

In [370]:
x = np.random.randn(100,4)

In [371]:
network.predict(x).shape

(100, 4)

In [372]:
t = make_one(np.random.randint(0,4,100))
t.shape

(100, 4)

In [373]:
network.loss(x,t)

0.4697572029696209

In [374]:
network.gradient(x,t)

0.4684668781838815

In [375]:
epochs = 100
for epoch in range(epochs):
    print(network.gradient(x,t))

0.4671913282017486
0.46593037428593276
0.464683839970117
0.4634515510334303
0.46223333547382967
0.46102902348053315
0.45983844740564306
0.45866144173509676
0.45749776921769514
0.45634707622749815
0.45520946136908236
0.4540847676451251
0.45297284004211674
0.45187352549953674
0.4507866728791619
0.44971213293459655
0.44864975828110404
0.4475994033658153
0.44656092443838136
0.4455341795221274
0.4445190283857613
0.44351533251568126
0.4425229550889178
0.44154176094674347
0.44057161656897065
0.43961239004895697
0.43866395106932915
0.4377261708784301
0.43679892226749084
0.4358820795485218
0.43497551853291616
0.434079116510751
0.4331927522307708
0.432316693947785
0.43145176990072087
0.4305965340178686
0.4297508703991951
0.4289146645140157
0.42808843720868395
0.42727502122326777
0.42647065453614025
0.425675229314576
0.42488908280358456
0.42411262247003506
0.42334476670483256
0.42258541242844344
0.4218344578008103
0.42109180220826004
0.4203573462506353
0.4196309917286326
0.4189126416313278
0.4182

## tensorflow, keras

In [376]:
## 데이터 정리
## network 설계 X 데이터의 shape, y 데이터의 shape
## 모델학습

In [377]:
## 필요한 모듈 호출
from tensorflow import keras
from keras.datasets import mnist
from keras.utils import to_categorical
import matplotlib.pyplot as plt

In [378]:
## 데이터 정리
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [379]:
## 데이터 확인
print(X_train.shape)
print(X_test.shape)

(60000, 28, 28)
(10000, 28, 28)


In [380]:
## X 데이터 변환
X_train = X_train.reshape(-1,28*28)
X_train.shape

(60000, 784)

In [381]:
## y 데이터 변환
y_train.shape
# 정수 인코딩 되어 있음
np.unique(y_train).size
#  -> one-hot 인코딩으로 필요
y_train = to_categorical(y_train)
y_train.shape ##(60000,10)

(60000, 10)

In [382]:
model = Layers()

In [383]:
model.add(784,2048,'relu')
model.add(2048,1024,'relu')
model.add(1024,512,'relu')
model.add(512,10,'softmax')

In [384]:
model.layers["Affine1"].W.shape

(784, 2048)

In [385]:
# model.predict(X_train).shape

In [386]:
for i in range(100):
    print(model.gradient(X_train,y_train))

0.832045836293497


KeyboardInterrupt: 

In [None]:
pred = model.predict(X_test.reshape(-1,28*28))

In [None]:
pred = np.argmax(pred, axis=1)

In [None]:
np.sum(pred == y_test)/y_test.size

0.0963

In [None]:
y_test

array([7, 2, 1, ..., 4, 5, 6], dtype=uint8)