Chapter1. 신경망
====
### 1.1 수학과 파이썬 복습

In [1]:
import numpy as np
W = np.array([[1, 2, 3], 
              [4, 5, 6]])
X = np.array([[0, 1, 2], 
              [3, 4, 5]])

In [2]:
W + X

array([[ 1,  3,  5],
       [ 7,  9, 11]])

In [3]:
# element-wise(point-wise)
W * X

array([[ 0,  2,  6],
       [12, 20, 30]])

In [4]:
A = np.array([[1, 2], 
              [3, 4]])

A * 10

array([[10, 20],
       [30, 40]])

In [5]:
A = np.array([[1, 2], 
              [3, 4]])
b = np.array([10, 20])

A * b

array([[10, 40],
       [30, 80]])

### 벡터의 내적
- $\mathbf{x} = (x_1, \dots, x_n)$, $\mathbf{y} = (y_1, \dots, y_n)$ 에 대하여, 백터의 내적은 두 벡터에서 대응하는 원소들의 곱을 모두 더한 것
    $$
\mathbf{x} \cdot \mathbf{y} = \mathbf{x}^{T} \mathbf{y} = x_1y_1 + x_2y_2 + \cdots + x_ny_n
$$

In [6]:
# 벡터의 내적
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

np.dot(a, b)

32

In [7]:
# 행렬의 곱
A = np.array([[1, 2], 
              [3, 4]])
B = np.array([[5, 6], 
              [7, 8]])

np.matmul(A, B)

array([[19, 22],
       [43, 50]])

In [8]:
np.dot(A, B)

array([[19, 22],
       [43, 50]])

### 1.2 신경망 추론

In [9]:
# Inpur -> hidden
W1 = np.random.randn(2, 4)  # 가중치
b1 = np.random.randn(4)
x = np.random.randn(10, 2)
h = np.matmul(x, W1) + b1  # b1 은 브로드캐스팅 됨

In [10]:
h

array([[ 0.32252584,  0.39630236,  0.87746871,  2.08688293],
       [ 1.28048016,  1.46781086,  1.05987576,  1.25152984],
       [ 2.19520373,  0.90683189,  1.21129518, -1.43136482],
       [ 1.44273029,  2.305136  ,  1.10019143,  1.89054768],
       [ 0.28652034,  0.98473617,  0.87964412,  2.8664906 ],
       [ 0.44932949,  1.65183802,  0.91761201,  3.30169794],
       [ 0.03228135, -2.10361694,  0.79095479, -0.2487576 ],
       [ 2.19878991,  2.58477235,  1.23602389,  0.55761285],
       [ 1.47838752,  2.96643725,  1.11590764,  2.59898843],
       [ 0.67521401, -1.1717835 ,  0.91643284, -0.55629026]])

In [11]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [12]:
a = sigmoid(h)  # 활성화(activation)
a

array([[0.57993969, 0.59779894, 0.7062974 , 0.88962172],
       [0.7825315 , 0.81272442, 0.7426668 , 0.77756457],
       [0.89981798, 0.71235143, 0.77052804, 0.19288612],
       [0.8088771 , 0.90930151, 0.75029597, 0.86881796],
       [0.57114404, 0.72804696, 0.70674847, 0.94616487],
       [0.6104798 , 0.83913931, 0.71455529, 0.96448701],
       [0.50806964, 0.10874577, 0.68803631, 0.43812932],
       [0.90014079, 0.9298751 , 0.77487116, 0.63590002],
       [0.8143289 , 0.95103463, 0.75322884, 0.93079645],
       [0.66266968, 0.23653276, 0.71431472, 0.36440626]])

In [13]:
a.shape

(10, 4)

In [14]:
# hidden -> output
W2 = np.random.randn(4, 3)
b2 = np.random.randn(3)

s = np.matmul(a, W2) + b2  # score 값
s

array([[-0.45205347, -2.15724574, -0.43151579],
       [-0.15394501, -2.57704987, -0.73517741],
       [ 1.17077949, -2.15835009, -1.16100808],
       [-0.36533225, -2.80481378, -0.72611997],
       [-0.70688643, -2.40503284, -0.39805934],
       [-0.79419005, -2.62641842, -0.43980207],
       [ 0.73105732, -1.04187035, -0.54945961],
       [ 0.18457095, -2.76529502, -0.95300733],
       [-0.50343272, -2.91182817, -0.70435701],
       [ 0.97640391, -1.30671483, -0.77375747]])

In [15]:
# 시그모이드(Sigmoid) 레이어 구현
class Sigmoid:
    '''Sigmoid Layer class
    
    Sigmoid layer에는 학습하는 params가 따로 없으므로 
    인스턴스 변수인 params는 빈 리스트로 초기화
    
    '''
    def __init__(self):
        self.params = []
    
    def forward(self, x):
        """순전파(forward propagation) 메서드
        Args:
            x(ndarray): 입력으로 들어오는 값
        Returns:
            Sigmoid 활성화 값
        """
        return 1 / (1 + np.exp(-x))

In [16]:
# 완전연결계층(Affine) 구현
class Affine:
    '''FC layer'''
    def __init__(self, W, b):
        """
        Args: 
            W(ndarray): 가중치(weight)
            b(ndarray): 편향(bias)
        """
        self.params = [W, b]
        
    def forward(self, x):
        """순전파(forward propagation) 메서드
        Args:
            x(ndarray): 입력으로 들어오는 값
        Returns:
            out(ndarray): Wx + b
        """
        W, b = self.params
        out = np.matmul(x, W) + b
        return out

In [17]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size
        # 가중치와 편향 초기화
        # input -> hidden
        W1 = np.random.randn(I, H)  
        b1 = np.random.randn(H)
         # hidden -> output
        W2 = np.random.randn(H, O) 
        b2 = np.random.randn(O)
        
        # 레이어 생성
        self.layers = [
            Affine(W1, b1),
            Sigmoid(),
            Affine(W2, b2)
        ]
        # 모든 가중치를 리스트에 모은다.
        self.parmas = [layer.params for layer in self.layers]
        # self.params = []
        # for layer in self.layers:
        #     self.params += layer.params
        
        
    def predict(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

In [18]:
x = np.random.randn(10, 2)
model = TwoLayerNet(2, 4, 3)
s = model.predict(x)

In [19]:
s

array([[-2.24747399, -2.13935735,  0.41854382],
       [-2.00206159, -2.15990205,  0.38258516],
       [-2.42973118, -3.31998456, -0.32856543],
       [-1.77611146, -1.96541305,  0.4040273 ],
       [-2.11082283, -2.28744723,  0.34541667],
       [-2.00345416, -2.27525013,  0.33360981],
       [-2.32356771, -2.4952179 ,  0.24778931],
       [-1.35765624, -2.60006665, -0.02934648],
       [-1.30931871, -2.80577934, -0.2073413 ],
       [-1.26268666, -1.70854025,  0.37410496]])

### 1.3 신경망 학습

In [20]:
# Repeat 노드 예제

D, N = 8, 7
x = np.random.randn(1, D)  # 입력
# np.repeat()이 복제노드 역할을 함
y = np.repeat(x, N, axis=0)  # 순전파

print(f'x.shape: {x.shape}')
print(f'y.shape: {y.shape}')

x.shape: (1, 8)
y.shape: (7, 8)


In [21]:
x

array([[ 0.12884531,  0.36058465, -0.70570528,  1.34323151, -0.06046834,
         0.35621279, -0.2311544 , -0.82609953]])

In [22]:
y

array([[ 0.12884531,  0.36058465, -0.70570528,  1.34323151, -0.06046834,
         0.35621279, -0.2311544 , -0.82609953],
       [ 0.12884531,  0.36058465, -0.70570528,  1.34323151, -0.06046834,
         0.35621279, -0.2311544 , -0.82609953],
       [ 0.12884531,  0.36058465, -0.70570528,  1.34323151, -0.06046834,
         0.35621279, -0.2311544 , -0.82609953],
       [ 0.12884531,  0.36058465, -0.70570528,  1.34323151, -0.06046834,
         0.35621279, -0.2311544 , -0.82609953],
       [ 0.12884531,  0.36058465, -0.70570528,  1.34323151, -0.06046834,
         0.35621279, -0.2311544 , -0.82609953],
       [ 0.12884531,  0.36058465, -0.70570528,  1.34323151, -0.06046834,
         0.35621279, -0.2311544 , -0.82609953],
       [ 0.12884531,  0.36058465, -0.70570528,  1.34323151, -0.06046834,
         0.35621279, -0.2311544 , -0.82609953]])

In [23]:
dy = np.random.randn(N, D)  # 랜덤한 기울기
# keepdims=True -> 차원수 유지
dx = np.sum(dy, axis=0, keepdims=True)  # 역전파

In [24]:
dx.shape

(1, 8)

In [25]:
# Sum 노드 역전파 예제

D, N = 8, 7
x = np.random.randn(N, D)  # 입력
y = np.sum(x, axis=0, keepdims=True)  # 순전파

print(f'x.shape: {x.shape}')
print(f'y.shape: {y.shape}')

x.shape: (7, 8)
y.shape: (1, 8)


In [26]:
dy = np.random.randn(1, D)  # 랜덤한 기울기 생성
dx = np.repeat(dy, N, axis=0)

In [27]:
dx

array([[-1.02253303, -0.43988526, -0.26428666, -0.9610439 ,  0.3894802 ,
        -0.48280064,  0.40698727,  1.83965076],
       [-1.02253303, -0.43988526, -0.26428666, -0.9610439 ,  0.3894802 ,
        -0.48280064,  0.40698727,  1.83965076],
       [-1.02253303, -0.43988526, -0.26428666, -0.9610439 ,  0.3894802 ,
        -0.48280064,  0.40698727,  1.83965076],
       [-1.02253303, -0.43988526, -0.26428666, -0.9610439 ,  0.3894802 ,
        -0.48280064,  0.40698727,  1.83965076],
       [-1.02253303, -0.43988526, -0.26428666, -0.9610439 ,  0.3894802 ,
        -0.48280064,  0.40698727,  1.83965076],
       [-1.02253303, -0.43988526, -0.26428666, -0.9610439 ,  0.3894802 ,
        -0.48280064,  0.40698727,  1.83965076],
       [-1.02253303, -0.43988526, -0.26428666, -0.9610439 ,  0.3894802 ,
        -0.48280064,  0.40698727,  1.83965076]])

In [28]:
# MatMul 클래스 구현
# common/layers.py
class MatMul:
    def __init__(self, W):
        self.params = [W]
        self.grads = [np.zeros_like(W)]
        self.x = None
        
    def forward(self, x):
        W, = self.params
        out = np.matmul(x, W)
        self.x = x
        return out
    
    def backward(self, dout):
        W, = self.params
        dx = np.matmul(dout, W.T)
        dW = np.matmul(self.x.T, dout)
        self.grads[0][...] = dW  # 깊은 복사
        return dx

In [29]:
# 얕은 복사와 깊은 복사 차이 확인
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

In [30]:
a = b

In [31]:
id(a) == id(b)

True

In [32]:
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

In [33]:
a[...] = b

In [34]:
id(a) == id(b)

False

In [35]:
# Sigmoid layer 클래스 구현
# common/layers.py
class Sigmoid:
    def __init__(self):
        self.params, self.grads = [], []
        self.out = None
        
    def forward(self, x):
        out = 1 / (1 + np.exp(-x))
        self.out = out
        return out
    
    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.out
        return dx

In [36]:
class Affine:
    def __init__(self, W, b):
        self.params = [W, b]
        self.grads = [np.zeros_like(W), np.zeros_like(b)]
        self.x = None
        
    def forward(self, x):
        W, b = self.params
        out = np.matmul(x, W) + b
        self.x = x
        return out
    
    def backward(self, dout):
        W, b = self.params
        dx = np.matmul(dout, W.T)
        dW = np.matmul(self.x.T, dout)
        db = np.sum(dout, axis=0)
        
        self.grads[0][...] = dW
        self.grads[1][...] = db
        return dx

In [39]:
import os
os.getcwd()

'/home/aiffel42/project/Python_Study/밑바닥/Ch01-NeuralNetworksReview'

In [42]:
import sys
sys.path.append('/home/aiffel42/project/Python_Study/밑바닥')  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
from common.functions import softmax, cross_entropy_error

class SoftmaxWithLoss:
    def __init__(self):
        self.params, self.grads = [], []
        self.y = None  # softmax의 출력
        self.t = None  # 정답 레이블

    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)

        # 정답 레이블이 원핫 벡터일 경우 정답의 인덱스로 변환
        # cross_entropy_error()에 이미 있어서 굳이 필요 없을 듯
        if self.t.size == self.y.size:
            self.t = self.t. argmax(axis=1)

        loss = cross_entropy_error(self.y, self.t)
        return loss

    def backward(self, dout=1):
        batch_size = self.t.shape[0]

        dx = self.y.copy()
        dx[np.arange(batch_size), self.t] -= 1
        dx *= dout
        dx /= batch_size

        return dx

ModuleNotFoundError: No module named 'common.functions'

In [None]:
class SGD:
    '''
    확률적 경사하강법(SGD, Stochastic Gradient Descent)
        W <- W - lr * (dL/dW)
    '''
    def __init__(self, lr=0.01):
        self.lr = lr  # learning rate

    def update(self, params, grads):
        for i in range(len(params)):
            params[i] -= self.lr * grads[i]

### 1.4 신경망으로 문제 해결

In [None]:
import sys
sys.path.append('..')  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import matplotlib.pyplot as plt

from dataset import spiral

x, t = spiral.load_data()
print('x', x.shape)  # (300, 2)
print('t', t.shape)

In [None]:
# 데이터점 플롯
N = 100
CLS_NUM = 3
markers = ['o', 'x', '^']
for i in range(CLS_NUM):
    plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=40, marker=markers[i])
plt.show()

In [None]:
# ch01/two_layer_net.py
import sys
sys.path.append('..')
from common.np import *
from common.layers import Affine, Sigmoid, SoftmaxWithLoss


class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size
        
        # 가중치와 편향 초기화
        W1 = 0.01 * np.random.randn(I, H)
        b1 = np.zeros(H)
        W2 = 0.01 * np.random.randn(H, O)
        b2 = np.zeros(O)
        
        # 레이어 생성
        self.layers = [
            Affine(W1, b1),
            Sigmoid(),
            Affine(W2, b2)
        ]
        self.loss_layer = SoftmaxWithLoss()
        
        # 모든 가중치와 기울기를 리스트에 모은다.
        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads
            
    def predict(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x
    
    def forward(self, x, t):
        score = self.predict(x)
        loss = self.loss_layer.forward(score, t)
        return loss
    
    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout

In [None]:
# ch01/train_custom_loop.py
import sys
sys.path.append('..')
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm  # pip install tqdm
from common.optimizer import SGD
from dataset import spiral
from two_layer_net import TwoLayerNet

matplotlib.rc('font', family='Malgun Gothic')  # linux
# matplotlib.rc('font', family='AppleGothic')  # Mac

# 1. 하이퍼파라미터 설정
max_epoch = 300
batch_size = 30
hidden_size = 10
learning_rate = 1.0

# 2. 데이터 읽기, 모델과 옵티마이저 생성
x, t = spiral.load_data()
model = TwoLayerNet(input_size=2,
                    hidden_size=hidden_size,
                    output_size=3)
optimizer = SGD(lr=learning_rate)

# 학습에 사용하는 변수
data_size = len(x)
max_iters = data_size // batch_size
total_loss = 0
loss_count = 0
loss_list = []

for epoch in tqdm(range(max_epoch)):
    # 3. 데이터 셔플링
    idx = np.random.permutation(data_size)
    x = x[idx]
    t = t[idx]
    
    for iters in range(max_iters):
        batch_x = x[iters*batch_size:(iters+1)*batch_size]
        batch_t = t[iters*batch_size:(iters+1)*batch_size]
        
        # 기울기를 구해 매개변수 갱신
        loss = model.forward(batch_x, batch_t)
        model.backward()
        optimizer.update(model.params, model.grads)
        
        total_loss += loss
        loss_count += 1
        
        # 정기적으로 학습 경과 출력
        if (iters+1) % 10 == 0:
            avg_loss = total_loss / loss_count
            print(f'| 에폭 {epoch+1} | 반복{iters+1}/{max_iters} | 손실 {avg_loss:.2f}')
            loss_list.append(avg_loss)
            total_loss, loss_count = 0, 0