# Training Logic

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
np.random.seed(7777)
tf.random.set_seed(7777)

In [3]:
class Cifar10DataLoader():
    def __init__(self):
        # data load 
        (self.train_x, self.train_y), \
            (self.test_x, self.test_y) = tf.keras.datasets.cifar10.load_data()
        self.input_shape = self.train_x.shape[1:]

    def scale(self, x):

        return (x / 255.0).astype(np.float32)

    def preprocess_dataset(self, dataset):

        (feature, target) = dataset

        # scaling #
        scaled_x = np.array([self.scale(x) for x in feature])

        # label encoding #
        ohe_y = np.array([tf.keras.utils.to_categorical(
            y, num_classes=10) for y in target])
        
        return scaled_x, ohe_y.squeeze(1)

    def get_train_dataset(self):
        return self.preprocess_dataset((self.train_x, self.train_y))

    def get_test_dataset(self):
        return self.preprocess_dataset((self.test_x, self.test_y))

cifar10_loader = Cifar10DataLoader()
train_x, train_y = cifar10_loader.get_train_dataset()

print(train_x.shape, train_x.dtype)
print(train_y.shape, train_y.dtype)

test_x, test_y = cifar10_loader.get_test_dataset()

print(test_x.shape, test_x.dtype)
print(test_y.shape, test_y.dtype)

(50000, 32, 32, 3) float32
(50000, 10) float32
(10000, 32, 32, 3) float32
(10000, 10) float32


In [4]:
from tensorflow.keras.layers import Input, Conv2D, MaxPool2D, Flatten, Dense, Add

def build_resnet(input_shape):
    inputs = Input(input_shape)

    net = Conv2D(32, kernel_size=3, strides=2,
                 padding='same', activation='relu')(inputs)
    net = MaxPool2D()(net)
    
    net1 = Conv2D(64, kernel_size=1, padding='same', activation='relu')(net)
    net2 = Conv2D(64, kernel_size=3, padding='same', activation='relu')(net1)
    net3 = Conv2D(64, kernel_size=1, padding='same', activation='relu')(net2)
    
    net1_1 = Conv2D(64, kernel_size=1, padding='same')(net)
    net = Add()([net1_1, net3])
    
    net1 = Conv2D(64, kernel_size=1, padding='same', activation='relu')(net)
    net2 = Conv2D(64, kernel_size=3, padding='same', activation='relu')(net1)
    net3 = Conv2D(64, kernel_size=1, padding='same', activation='relu')(net2)
    
    net = Add()([net, net3])
    
    net = MaxPool2D()(net)
    
    net = Flatten()(net)
    net = Dense(10, activation="softmax")(net)

    model = tf.keras.Model(inputs=inputs, outputs=net, name='resnet')
    
    return model

model = build_resnet((32, 32, 3))
model.summary()

Model: "resnet"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 32, 32, 3)]  0           []                               
                                                                                                  
 conv2d (Conv2D)                (None, 16, 16, 32)   896         ['input_1[0][0]']                
                                                                                                  
 max_pooling2d (MaxPooling2D)   (None, 8, 8, 32)     0           ['conv2d[0][0]']                 
                                                                                                  
 conv2d_1 (Conv2D)              (None, 8, 8, 64)     2112        ['max_pooling2d[0][0]']          
                                                                                             

## 학습하는 과정을 직접 만들어보자!

```
for e in epochs:                                  매 epoch마다
    
    for batch_x, batch_y in dataset:              데이터셋의 배치셋 마다
        
        pred = model(batch_x)                     예측하고
        loss = loss_fn(batch_y, pred)             손실을 구하고
        gradients                                 기울기를 구하고
        weight updates                            가중치를 업데이트 한다.
        process print                             위 과정을 출력한다.
```

In [5]:
learning_rate = 0.03
opt = tf.keras.optimizers.Adam(learning_rate)

loss_fn = tf.keras.losses.categorical_crossentropy # 이건 함수


In [8]:
train_loss = tf.keras.metrics.Mean(name='train_loss') # 이건 객체
train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy') # 이건 객체

In [9]:
def train_step(x, y) :
    with tf.GradientTape() as tape:
        pred = model(x)
        loss = loss_fn(y, pred)
        
    gradients = tape.gradient(loss, model.trainable_variables)
    opt.apply_gradients(zip(gradients, model.trainable_variables)) # zip 
    
    train_loss(loss)# 객체라 변수 선언 안해도 된다. 값이 알아서 누적되있음(tf기능)
    train_accuracy(y, pred)

In [10]:
batch_size = 64

# 실제 학습하는 루프

for epoch in range(1):
    
    for i in range(train_x.shape[0] // batch_size):
        
        # 데이터 배치 나누기
        idx = i * batch_size
        x, y = train_x[idx:idx+batch_size], train_y[idx:idx+batch_size]
        
        # 학습 및 가중치 업데이트
        train_step(x, y)
        
        # 학습 진행상황 출력
        print("\r {} / {}".format(i, train_x.shape[0] // batch_size), end='\r')
    
    # epoch마다 결과 출력
    fmt = 'epoch {} loss: {}, accuracy: {}'
    print(fmt.format(epoch+1, 
                          train_loss.result(),
                          train_accuracy.result() * 100)
         )
    
    # loss와 acc 객체는 값을 계속 누적시킨다. 
    # 그럼 각 epoch의 값이 아니라 계속 누적시킨 평균값이 나오게 된다.
    # 그래서 reset 시켜줘야한다.
    train_loss.reset_states()
    train_accuracy.reset_states()

epoch 1 loss: 2.4192771911621094, accuracy: 9.85515308380127


## 데코레이션 : @tf.function
보통 함수 처럼 함수가 끝나면 결과가 소멸되는 것이 아니라 관련 작업물들의 실행결과들을 기억해놓는다?
-> 연산속도 향상?

그래프 최적화(Graph Optimization)는 만약 동일한 연산이 여기저기서 반복되는 경우 해당 연산 결과를 캐쉬(cache)로 저장해서 사용함으로써 동일 연산이 반복적으로 일어나지 않도록 한다거나, 복잡한 연산의 경우 다수의 장비에서 병렬처리(parallel on multiple devices)를 하여 연산을 빠르게 수행할 수 있도록 하여 성능을 최적화해줍니다. 

In [11]:
# 데코레이터 @ : tf에 관련된 하부 함수들을 미리 다 실행 시켜놓음? -> 연산이 빨라짐?

@tf.function
def train_step(x, y) :
    with tf.GradientTape() as tape:
        pred = model(x)
        loss = loss_fn(y, pred)
        
    gradients = tape.gradient(loss, model.trainable_variables)
    opt.apply_gradients(zip(gradients, model.trainable_variables))
    
    train_loss(loss)
    train_accuracy(y, pred)


In [12]:
# 위 코드를 정리한 실사용 코드

batch_size = 64
num_of_batch_train = train_x.shape[0] // batch_size

for epoch in range(1):
    for i in range(num_of_batch_train):
        idx = i * batch_size
        x, y = train_x[idx:idx+batch_size], train_y[idx:idx+batch_size]
        train_step(x, y)
        print("\r {} / {}".format(i, num_of_batch_train), end='\r')
    
    fmt = 'epoch {} loss: {}, accuracy: {}'
    print(fmt.format(epoch+1, 
                          train_loss.result(),
                          train_accuracy.result() * 100)
         )
      # Reset metrics every epoch
    train_loss.reset_states()
    train_accuracy.reset_states()

epoch 1 loss: 2.3281641006469727, accuracy: 9.943181991577148
