In [1]:
import os
import numpy as np
import tensorflow as tf

In [2]:
(X_train, Y_train), (X_test, Y_test) = tf.keras.datasets.mnist.load_data()
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

size_height = X_train.shape[1]
size_width = X_train.shape[2]
num_data_train = X_train.shape[0]
num_data_test = X_test.shape[0]

X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)

X_train = np.expand_dims(X_train, axis=3)
X_test = np.expand_dims(X_test, axis=3)

X_train /= 255.0
X_test /= 255.0

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


In [3]:
num_epochs = 10
size_batch = 64
num_classes = np.unique(Y_train).shape[0]

rate_learning = 1e-3 ## 매우 중요
rate_dropout = 0.5

In [4]:
dataset_train = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
dataset_train = dataset_train.shuffle(10000).batch(size_batch)

dataset_test = tf.data.Dataset.from_tensor_slices((X_test, Y_test))
dataset_test = dataset_test.batch(size_batch)

In [6]:
class CNN(tf.keras.Model):
    def __init__(self,
                 num_classes, size_kernel,
                 num_filters, num_denses, rate_dropout):
        super(CNN, self).__init__()

        self.num_classes = num_classes #int
        self.size_kernel = size_kernel #int
        self.num_filters = num_filters #list
        self.num_denses = num_denses #list
        self.rate_dropout = rate_dropout #float

        ### layers
        self.layer_conv_1 = tf.keras.layers.Conv2D(
            num_filters[0], #output channel
            (size_kernel, size_kernel), #kernel size 
            padding = 'same' #same size로 들어가면 same size로 만들어줌 / cf: valid 옵션을 주면 output이 줄어듬
        ) # 3 * 3 * 1 * 64 (kernels) + 64(bias) = 10 * 64 = 640
        self.layer_act_1 = tf.keras.layers.ReLU()
        self.layer_pool_1 = tf.keras.layers.MaxPool2D(
            strides = (2, 2),
            pool_size = (2, 2),
            padding = 'same'
        )
        
        self.layer_conv_2 = tf.keras.layers.Conv2D(
            num_filters[1], #output channel
            (size_kernel, size_kernel), #kernel size 
            padding = 'same' #same size로 들어가면 same size로 만들어줌 / cf: valid 옵션을 주면 output이 줄어듬
        ) # 3 * 3 * 64 * 128 (kernels) + 128(bias) =  * 128 = 73856
        self.layer_act_2 = tf.keras.layers.ReLU()
        self.layer_pool_2 = tf.keras.layers.MaxPool2D(
            strides = (2, 2),
            pool_size = (2, 2),
            padding = 'same'
        )
        
        self.layer_conv_3 = tf.keras.layers.Conv2D(
            num_filters[2], #output channel
            (size_kernel, size_kernel), #kernel size 
            padding = 'same' #same size로 들어가면 same size로 만들어줌 / cf: valid 옵션을 주면 output이 줄어듬
        ) #3 * 3 * 128 * 256 (kernels) + 256(bias) =  * 256 = 73856
        self.layer_act_3 = tf.keras.layers.ReLU()
        self.layer_pool_3 = tf.keras.layers.MaxPool2D(
            strides = (2, 2),
            pool_size = (2, 2),
            padding = 'same'
        )
        self.layer_flatten = tf.keras.layers.Flatten()
        
        self.layer_fc_1 = tf.keras.layers.Dense(
            num_denses[0], activation = tf.nn.relu
        ) # 4 * 4 * 256 * 256 + 256(bias) = 1048832
        
        self.layer_fc_2 = tf.keras.layers.Dense(
            num_denses[1], activation = tf.nn.relu
        ) # 256 * 128 + 128 = 32896
            
        self.layer_fc_3 = tf.keras.layers.Dense(
            num_classes, activation = tf.nn.softmax
        ) # 128 * 10 + 10 = 1290  
        
        ###

    def call(self, inputs, training):
        ###
        #inputs: (batch size, 28, 28, 1) , 
        outputs = inputs
        # (batch size, 28, 28, 1)
        
        outputs = self.layer_conv_1(outputs)
        # (batch size, 28, 28, 64)
        outputs = self.layer_act_1(outputs)
        # (batch size, 28, 28, 64)
        outputs = self.layer_pool_1(outputs)
        # (batch size, 14, 14, 64)
        
        outputs = self.layer_conv_2(outputs)
        # (batch size, 14, 14, 128)
        outputs = self.layer_act_2(outputs)
        # (batch size, 14, 14, 128)
        outputs = self.layer_pool_2(outputs)
        # (batch size, 7, 7, 128)
        
        outputs = self.layer_conv_3(outputs)
        # (batch size, 7, 7, 256)
        outputs = self.layer_act_3(outputs)
        # (batch size, 7, 7, 256)
        outputs = self.layer_pool_3(outputs)
        # (batch size, 4, 4, 256): 4차원 텐서
        
        outputs = self.layer_flatten(outputs)
        # (batch size, 4* 4 * 256)
        
        outputs = self.layer_fc_1(outputs)
        # (batch size, 256)
        outputs = self.layer_fc_2(outputs)
        # (batch size, 128)
        outputs = self.layer_fc_3(outputs)
        # (batch size, num_classes = 10)
        # 숫자 0 ~ 9 분류
        ###

        return outputs

In [7]:
###
model = CNN(num_classes, 
            3, [64, 128, 256], 
            [256, 128], rate_dropout)
optimizer = tf.keras.optimizers.Adam(rate_learning)
loss = tf.keras.losses.SparseCategoricalCrossentropy()
###

model.build((None, size_height, size_width, 1))
model.summary()

Model: "cnn"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              multiple                  640       
_________________________________________________________________
re_lu (ReLU)                 multiple                  0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) multiple                  0         
_________________________________________________________________
conv2d_1 (Conv2D)            multiple                  73856     
_________________________________________________________________
re_lu_1 (ReLU)               multiple                  0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 multiple                  0         
_________________________________________________________________
conv2d_2 (Conv2D)            multiple                  295168  

In [10]:
# decorator
@tf.function
def step_train(X, by):
    ###
    with tf.GradientTape() as tape:
        preds_ = model(X, True)
        loss_ = loss(by, preds_)
        
    grads_ = tape.gradient(loss_, model.trainable_weights)
    # dL/dw = gradient of w
    # grads_ -> 1452682
    optimizer.apply_gradients(
        zip(grads_, model.trainable_weights)
    )
    
    ###

    return loss_

@tf.function
def step_test(X, by):
    ###
    preds_ = model(X, False)
    loss_ = loss(by, preds_)
    ###

    return loss_

In [None]:
for ind_epoch in range(0, num_epochs):
    loss_train = 0.0
    loss_test = 0.0

    num_train = 0.0
    num_test = 0.0

    for ind_iter, (X_batch, by_batch) in enumerate(dataset_train):
        # batch size = 64
        ###
        loss_ = step_train(X_batch, by_batch)
        loss_train += loss_ * X_batch.shape[0]
        num_train += X_batch.shape[0]
        ###

    for X_batch, by_batch in dataset_test:
        # batch size = 63
        ###
        loss_ = step_test(X_batch, by_batch)
        loss_test += loss_ * X_batch.shape[0]
        num_test += X_batch.shape[0]
        ###

    loss_train /= num_train
    loss_test /= num_test
    
    acc_train = 0.0
    acc_test = 0.0

    print('{} EPOCH: loss_train {:.4f} acc_train {:.4f} loss_test {:.4f} acc_test {:.4f}'.format(
        ind_epoch + 1, loss_train, acc_train, loss_test, acc_test))   

1 EPOCH: loss_train 0.0400 acc_train 0.0000 loss_test 0.0366 acc_test 0.0000
2 EPOCH: loss_train 0.0286 acc_train 0.0000 loss_test 0.0240 acc_test 0.0000
3 EPOCH: loss_train 0.0218 acc_train 0.0000 loss_test 0.0225 acc_test 0.0000
4 EPOCH: loss_train 0.0177 acc_train 0.0000 loss_test 0.0260 acc_test 0.0000
5 EPOCH: loss_train 0.0152 acc_train 0.0000 loss_test 0.0310 acc_test 0.0000
6 EPOCH: loss_train 0.0119 acc_train 0.0000 loss_test 0.0316 acc_test 0.0000
7 EPOCH: loss_train 0.0111 acc_train 0.0000 loss_test 0.0303 acc_test 0.0000
8 EPOCH: loss_train 0.0091 acc_train 0.0000 loss_test 0.0391 acc_test 0.0000
9 EPOCH: loss_train 0.0090 acc_train 0.0000 loss_test 0.0374 acc_test 0.0000
