<a href="https://colab.research.google.com/github/ElaYJ/Study_Deep_Learning/blob/main/Framework/TensorFlow/13_Training_Logic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Training Logic

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
np.random.seed(7777)
tf.random.set_seed(7777)

In [3]:
class Cifar10DataLoader():
    def __init__(self):
        # data load
        (self.train_x, self.train_y), \
            (self.test_x, self.test_y) = tf.keras.datasets.cifar10.load_data()
        self.input_shape = self.train_x.shape[1:]

    def scale(self, x):

        return (x / 255.0).astype(np.float32)

    def preprocess_dataset(self, dataset):

        (feature, target) = dataset

        # scaling #
        scaled_x = np.array([self.scale(x) for x in feature])

        # label encoding #
        ohe_y = np.array([tf.keras.utils.to_categorical(y, num_classes=10) for y in target])

        return scaled_x, ohe_y.squeeze(1)

    def get_train_dataset(self):
        return self.preprocess_dataset((self.train_x, self.train_y))

    def get_test_dataset(self):
        return self.preprocess_dataset((self.test_x, self.test_y))

cifar10_loader = Cifar10DataLoader()
train_x, train_y = cifar10_loader.get_train_dataset()

print(train_x.shape, train_x.dtype)
print(train_y.shape, train_y.dtype)

test_x, test_y = cifar10_loader.get_test_dataset()

print(test_x.shape, test_x.dtype)
print(test_y.shape, test_y.dtype)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
(50000, 32, 32, 3) float32
(50000, 10) float32
(10000, 32, 32, 3) float32
(10000, 10) float32


In [4]:
from tensorflow.keras.layers import Input, Conv2D, MaxPool2D, Flatten, Dense, Add

def build_resnet(input_shape):
    inputs = Input(input_shape)

    net = Conv2D(32, kernel_size=3, strides=2, padding='same', activation='relu')(inputs)
    net = MaxPool2D()(net)

    net1 = Conv2D(64, kernel_size=1, padding='same', activation='relu')(net)
    net2 = Conv2D(64, kernel_size=3, padding='same', activation='relu')(net1)
    net3 = Conv2D(64, kernel_size=1, padding='same', activation='relu')(net2)

    net1_1 = Conv2D(64, kernel_size=1, padding='same')(net)
    net = Add()([net1_1, net3])

    net1 = Conv2D(64, kernel_size=1, padding='same', activation='relu')(net)
    net2 = Conv2D(64, kernel_size=3, padding='same', activation='relu')(net1)
    net3 = Conv2D(64, kernel_size=1, padding='same', activation='relu')(net2)

    net = Add()([net, net3])

    net = MaxPool2D()(net)

    net = Flatten()(net)
    net = Dense(10, activation="softmax")(net)

    model = tf.keras.Model(inputs=inputs, outputs=net, name='resnet')

    return model

model = build_resnet((32, 32, 3))
model.summary()

Model: "resnet"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 32, 32, 3)]          0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 16, 16, 32)           896       ['input_1[0][0]']             
                                                                                                  
 max_pooling2d (MaxPooling2  (None, 8, 8, 32)             0         ['conv2d[0][0]']              
 D)                                                                                               
                                                                                                  
 conv2d_1 (Conv2D)           (None, 8, 8, 64)             2112      ['max_pooling2d[0][0]']  

## 학습하는 과정을 직접 만들어보자!

In [5]:
learning_rate = 0.03

# functions
opt = tf.keras.optimizers.Adam(learning_rate)
loss_fn = tf.keras.losses.categorical_crossentropy

In [6]:
loss_fn([1], [0.8])

  return dispatch_target(*args, **kwargs)


<tf.Tensor: shape=(), dtype=float32, numpy=1.192093e-07>

In [7]:
loss_fn([1, 0], [0.8, 0.2])

<tf.Tensor: shape=(), dtype=float32, numpy=0.22314353>

In [8]:
# Class instances

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

In [9]:
# 학습 과정

# for e in epochs:
#     for batch_x, batch_y in dataset:
#         pred = model(batch_x)
#         loss_fn(batch_y, pred)
#         gradients
#         weight_update
#         print()

In [10]:
# model weights
model.trainable_variables

[<tf.Variable 'conv2d/kernel:0' shape=(3, 3, 3, 32) dtype=float32, numpy=
 array([[[[-0.11084427,  0.03928088, -0.05376609,  0.13756134,
            0.05854   , -0.13327944,  0.09214586, -0.0399617 ,
           -0.03771443, -0.00206238, -0.05982277,  0.08267742,
            0.02665983,  0.03711253, -0.09328039, -0.09757478,
            0.08227219, -0.00288881, -0.12957191,  0.0380526 ,
           -0.07334447,  0.01605259,  0.10326956, -0.06639638,
           -0.12319067, -0.00394771,  0.03608139, -0.05662191,
           -0.07553163,  0.12169133,  0.0244844 , -0.07790901],
          [-0.03987869,  0.00471959,  0.12795974,  0.03110974,
           -0.01952283,  0.10317141, -0.04165187,  0.07382426,
            0.03891641,  0.10432673,  0.05927378,  0.0525088 ,
           -0.00508989,  0.05182461, -0.03061026, -0.00317618,
            0.11179258,  0.04121706, -0.05368476, -0.12449716,
           -0.01069814, -0.0240184 ,  0.09021829,  0.05069377,
            0.0940222 , -0.12291901,  0.119

In [11]:
def train_step(x, y) :
    # 자동 미분
    with tf.GradientTape() as tape:
        pred = model(x)
        loss = loss_fn(y, pred)

    gradients = tape.gradient(loss, model.trainable_variables)

    # weights update
    opt.apply_gradients(zip(gradients, model.trainable_variables))

    # 결과가 Object에 속성값으로 누적
    train_loss(loss)
    train_accuracy(y, pred)

In [12]:
batch_size = 64

# 학습 Loop
for epoch in range(1):

    for i in range(train_x.shape[0] // batch_size):
        idx = i * batch_size
        x, y = train_x[idx:idx+batch_size], train_y[idx:idx+batch_size]

        train_step(x, y)
        print("\r {} / {}".format(i, train_x.shape[0] // batch_size), end='\r')

    fmt = 'epoch {} --> loss: {:.5f}, accuracy: {:.2f}%'
    print(fmt.format(epoch+1,
                     train_loss.result(),
                     train_accuracy.result() * 100)
    ) #--> epoch를 돌 때마다 누적된 metrics 값의 평균이 출력된다.



epoch 1 --> loss: 2.42690, accuracy: 10.05%


- `@tf.function`이라는 Decorator를 사용한다.

- 연산이 이루어지는 함수를 따로 만들고 decorator 선언을 한다.

- for문을 돌 때마다 함수가 생성되고 지워지고를 반복하지 않고,

    선언되는 순간 함수가 테이블로 세팅되어 속도를 향상시킬 수 있다.

In [13]:
@tf.function
def train_step(x, y) :
    with tf.GradientTape() as tape:
        pred = model(x)
        loss = loss_fn(y, pred)

    gradients = tape.gradient(loss, model.trainable_variables)
    opt.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss(loss)
    train_accuracy(y, pred)


In [15]:
batch_size = 64
num_of_batch_train = train_x.shape[0] // batch_size

for epoch in range(1):
    for i in range(num_of_batch_train):
        idx = i * batch_size
        x, y = train_x[idx:idx+batch_size], train_y[idx:idx+batch_size]

        train_step(x, y)
        print("\r {} / {}".format(i, num_of_batch_train), end='\r')

    fmt = 'epoch {} loss: {}, accuracy: {}'
    print(fmt.format(epoch+1,
                     train_loss.result(),
                     train_accuracy.result() * 100)
    )
    # Reset metrics every epoch
    # 각 epoch 마다의 metrics 값을 확인하기 위해 reset이 필요
    # reset하지 않으면 epoch를 돌 때마다 누적되는 metrics 값의 평균이 출력됨
    train_loss.reset_states()
    train_accuracy.reset_states()

epoch 1 loss: 2.32004451751709, accuracy: 9.913172721862793
