# ResNet 개요
- VGG이후 Network를 깊게 하는 것에 대한 연구가 진행. 하지만, Network의 깊이가 깊어질수록 성능이 저하되는 문제점이 존재.
    - Vanishing Gradient
    - 최적으로 loss 감소가 이루어지지 않음(수렴이 아닌 발산)

- ResNet의 주요 특징
    - Shortcut
        - 이전 레이어의 출력값을 conv layer를 거치지 않고 전달
    - Identity block

In [1]:
from tensorflow.keras.layers import Input, Conv2D, Dense, BatchNormalization, Activation, ZeroPadding2D, MaxPooling2D, Dropout, GlobalAveragePooling2D, Add
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam , RMSprop
from tensorflow.keras.utils import to_categorical, Sequence
from sklearn.model_selection import train_test_split
from tensorflow.keras.datasets import cifar10
import sklearn
import numpy as np
import cv2
import os
import pandas as pd

## Identity_block_34 구현하기

In [2]:
# ResNet34 identity block 구현하기

def identity_block_34(input_tensor, middle_kernel_size, filters, stage, block):
    
    filter1, filter2 = filters

    # 레이어 이름설정
    conv_name = 'res_s' + str(stage) + '_b' + str(block) + '_branch_'
    bn_name = 'bn_s'+ str(stage) + '_b' + str(block) + '_branch_'

    # 1번째 conv
    x = Conv2D(filters=filter1, kernel_size=middle_kernel_size, padding='same', kernel_initializer='he_normal', name=conv_name+'a')(input_tensor)
    x = BatchNormalization(axis=3, name=bn_name+'a')(x)
    x = Activation('relu')(x)

    # 2번째 conv
    x = Conv2D(filters=filter2, kernel_size=middle_kernel_size, padding='same', kernel_initializer='he_normal', name=conv_name+'b')(x)
    x = BatchNormalization(axis=3, name=bn_name+'b')(x)
    x = Activation('relu')(x)

    # Add
    x = Add()([input_tensor, x])
    x = Activation('relu')(x)

    return x


In [3]:
# identity_block_34 실습

input_tensor = Input(shape=(56,56,64), name='test_input')
x = identity_block_34(input_tensor, middle_kernel_size=3, filters = [64,64], stage=2, block=1)
output = identity_block_34(x, middle_kernel_size=3, filters = [64,64], stage=2, block=2)
identity_layers = Model(inputs=input_tensor, outputs=output)
identity_layers.summary()

# __init__() missing 1 required positional argument: 'filters' => filters를 잘못씀

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 test_input (InputLayer)        [(None, 56, 56, 64)  0           []                               
                                ]                                                                 
                                                                                                  
 res_s2_b1_branch_a (Conv2D)    (None, 56, 56, 64)   36928       ['test_input[0][0]']             
                                                                                                  
 bn_s2_b1_branch_a (BatchNormal  (None, 56, 56, 64)  256         ['res_s2_b1_branch_a[0][0]']     
 ization)                                                                                         
                                                                                              

## identity_block_50 구현하기

In [4]:
# ResNet50 identity block 구현하기

def identity_block_50(input_tensor, middle_kernel_size, filters, stage, block):

    filter1, filter2, filter3 = filters
    
    # 레이어 이름설정
    conv_name = 'res_s' + str(stage) + '_b' + str(block) + '_branch_'
    bn_name = 'bn_s'+ str(stage) + '_b' + str(block) + '_branch_'

    # 1번째 Conv
    x = Conv2D(filters=filter1, kernel_size=(1,1), kernel_initializer='he_normal', name=conv_name+'a')(input_tensor)
    x = BatchNormalization(axis=3, name=bn_name+'a')(x)
    x = Activation('relu')(x)

    # 2번째 Conv
    x = Conv2D(filters=filter2, kernel_size=middle_kernel_size, padding='same', kernel_initializer='he_normal', name=conv_name+'b')(x)
    x = BatchNormalization(axis=3, name=bn_name+'b')(x)
    x = Activation('relu')(x)

    # 3번째 Conv
    x = Conv2D(filters=filter3, kernel_size=(1,1), kernel_initializer='he_normal', name=conv_name+'c')(x)
    x = BatchNormalization(axis=3, name=bn_name+'c')(x)
    
    # Add
    x = Add()([input_tensor, x])
    x = Activation('relu')(x)

    return x   


In [5]:
# identity_block_50 실습
input_tensor = Input(shape=(56,56,256), name='test_input')
x = identity_block_50(input_tensor, middle_kernel_size=(3,3), filters=[64,64,256], stage=2, block='1')
x = identity_block_50(x, middle_kernel_size=(3,3), filters=[64,64,256], stage=2, block='2')
output = identity_block_50(x, middle_kernel_size=(3,3), filters=[64,64,256], stage=2, block='3')
identity_layers = Model(inputs=input_tensor, outputs=output)
identity_layers.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 test_input (InputLayer)        [(None, 56, 56, 256  0           []                               
                                )]                                                                
                                                                                                  
 res_s2_b1_branch_a (Conv2D)    (None, 56, 56, 64)   16448       ['test_input[0][0]']             
                                                                                                  
 bn_s2_b1_branch_a (BatchNormal  (None, 56, 56, 64)  256         ['res_s2_b1_branch_a[0][0]']     
 ization)                                                                                         
                                                                                            

## reduce_block_34 구현하기
- feature map의 크기를 절반으로 줄이는 block 구현
- feature map의 사이즈를 맞추기 위해 zeropadding 설정

In [6]:
def reduce_block_34(input_tensor, middle_kernel_size, filters, stage, block, strides=(2,2)):

    filter1, filter2 = filters

    conv_name = 'res_s' + str(stage) + '_b' + str(block) + '_branch_'
    bn_name = 'bn_s'+ str(stage) + '_b' + str(block) + '_branch_'

    # 첫번째 conv(feature map의 사이즈를 절반으로 줄임.)
    x = ZeroPadding2D(padding=(1,1), name=conv_name+'padding')(input_tensor) # zeropadding을 하지 않을 경우 사이즈를 줄인 결과값이 (27,27)로 나온다
    x = Conv2D(filters=filter1, kernel_size=(3,3), strides=(2,2), kernel_initializer='he_normal', name=conv_name+'a')(x)
    x = BatchNormalization(axis=3, name=bn_name+'a')(x)
    x = Activation('relu')(x)

    # 두번째 conv
    x = Conv2D(filters=filter2, kernel_size=middle_kernel_size, padding='same', kernel_initializer='he_normal', name=conv_name+'b')(x)
    x = BatchNormalization(axis=3, name=bn_name+'b')(x)
    
    # shortcut
    shortcut = Conv2D(filters=filter2, kernel_size=(1,1), strides=strides, kernel_initializer='he_normal', name=conv_name+'shortcut')(input_tensor)
    shortcut = BatchNormalization(axis=3, name=bn_name+'shortcut')(x)

    # ADD
    shortcut = Add()([x, shortcut])
    x = Activation('relu')(x)

    return x

In [7]:
# reduce_block_34 실습

input_tensor = Input(shape=[56,56,128], name='input_test')
x = reduce_block_34(input_tensor, middle_kernel_size=3, filters=[128,128], stage=2, block=1)
output = identity_block_34(x, middle_kernel_size=3, filters=[128,128], stage=2, block=2)
identity_layers = Model(inputs=input_tensor, outputs=output)
identity_layers.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_test (InputLayer)        [(None, 56, 56, 128  0           []                               
                                )]                                                                
                                                                                                  
 res_s2_b1_branch_padding (Zero  (None, 58, 58, 128)  0          ['input_test[0][0]']             
 Padding2D)                                                                                       
                                                                                                  
 res_s2_b1_branch_a (Conv2D)    (None, 28, 28, 128)  147584      ['res_s2_b1_branch_padding[0][0]'
                                                                 ]                          

## reduce_block_50 구현하기
- feature map의 사이즈를 절반으로 줄이는 block 구현
- feature map의 사이즈가 절반으로 줄어드는 stage의 첫번째 block으로 사용

In [8]:
# feature map의 크기를 절반으로 줄이는 reduce_block()만들기

def reduce_block_50(input_tensor, middle_kernel_size, filters, stage, block, strides=(2,2)):
    
    filter1, filter2, filter3 = filters

    conv_name = 'res_s' + str(stage) + '_b' + str(block) + '_branch_'
    bn_name = 'bn_s'+ str(stage) + '_b' + str(block) + '_branch_'

    # 첫번째 conv (feature map의 크기를 절반으로 줄임. 첫번째 stage에서는 예외.)
    x = Conv2D(filters=filter1, kernel_size=(1,1), strides=strides, kernel_initializer='he_normal', name=conv_name+'1')(input_tensor)
    x = BatchNormalization(axis=3, name=bn_name+'a')(x)
    x = Activation('relu')(x)

    # 두번째 conv
    x = Conv2D(filters=filter2, kernel_size=middle_kernel_size, padding='same', kernel_initializer='he_normal', name=conv_name+'2')(x)
    x = BatchNormalization(axis=3, name=bn_name+'b')(x)
    x = Activation('relu')(x)

    # 세번째 conv
    x = Conv2D(filters=filter3, kernel_size=(1,1), kernel_initializer='he_normal', name=conv_name+'3')(x)
    x = BatchNormalization(axis=3, name=bn_name+'c')(x)
    
    # shortcut 
    shortcut = Conv2D(filters=filter3, kernel_size=(1,1), strides=strides, kernel_initializer='he_normal', name = conv_name+'shortcut')(input_tensor)
    shortcut = BatchNormalization(axis=3, name=bn_name+'shortcut')(shortcut)

    # ADD
    x = Add()([x, shortcut])
    x = Activation('relu')(x)

    return x



In [9]:
# reduce_block 실습

input_tensor = Input(shape=(56,56,256), name='test_input')
x = reduce_block_50(input_tensor, middle_kernel_size=(3,3), filters=[64,64,256], stage=2, block=1, strides=(2,2))
x = identity_block_50(x, middle_kernel_size=3, filters=[64,64,256], stage=2, block=2)
output = identity_block_50(x, middle_kernel_size=3, filters=[64,64,256], stage=2, block=3)
identity_layers = Model(inputs=input_tensor, outputs=output)
identity_layers.summary()



Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 test_input (InputLayer)        [(None, 56, 56, 256  0           []                               
                                )]                                                                
                                                                                                  
 res_s2_b1_branch_1 (Conv2D)    (None, 28, 28, 64)   16448       ['test_input[0][0]']             
                                                                                                  
 bn_s2_b1_branch_a (BatchNormal  (None, 28, 28, 64)  256         ['res_s2_b1_branch_1[0][0]']     
 ization)                                                                                         
                                                                                            

## 첫번째 stage 구현

In [10]:
# stage1 구현
# input image에 7x7 conv, strides = 2, filters = 64
# 3x3 maxpooling, strides = 2


def first_conv(input_tensor):

    x = ZeroPadding2D(padding=(3,3), name='conv1_pad')(input_tensor)
    x = Conv2D(filters=64, kernel_size=(7,7), strides=2, padding = 'valid', kernel_initializer='he_normal', name='conv1')(x)
    x = BatchNormalization(axis=3, name='conv1_nm')(x)
    x = Activation('relu')(x)

    x = ZeroPadding2D(padding=(1,1), name='pool1')(x)
    x = MaxPooling2D(pool_size=(3,3), strides=2)(x)

    return x


In [11]:
input_tensor=Input(shape=(224,224,3), name='test_input')
output = first_conv(input_tensor)

model = Model(inputs=input_tensor, outputs = output)
model.summary()

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 test_input (InputLayer)     [(None, 224, 224, 3)]     0         
                                                                 
 conv1_pad (ZeroPadding2D)   (None, 230, 230, 3)       0         
                                                                 
 conv1 (Conv2D)              (None, 112, 112, 64)      9472      
                                                                 
 conv1_nm (BatchNormalizatio  (None, 112, 112, 64)     256       
 n)                                                              
                                                                 
 activation_29 (Activation)  (None, 112, 112, 64)      0         
                                                                 
 pool1 (ZeroPadding2D)       (None, 114, 114, 64)      0         
                                                           

In [12]:
# first_conv 실습
# 주의할 점! 
# first_conv의 결과값(= (56,56,64)), identity_block의 결과값(= (56,56,256))
# 채널수가 다르기 때문에 연산을 할 수 없다. 
# first_conv의 결과값을 (56,56,64) -> (56,56,256)으로 변환하여 더해야 한다.=> 따라서, 첫번째 블록으로는 identity_block이 아닌 reduce_block을 사용.

input_tensor=Input(shape=(224,224,3), name='test_input')
x = first_conv(input_tensor)
# output = identity_block_50(x, middle_kernel_size=(3,3), filters=[64,64,256], stage=2, block=1)
output = reduce_block_50(x, 3, filters=[64,64,256], stage=2, block=1, strides=(1,1))


model = Model(inputs=input_tensor, outputs = output)
model.summary()

Model: "model_5"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 test_input (InputLayer)        [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['test_input[0][0]']             
                                                                                                  
 conv1 (Conv2D)                 (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                            

## ResNet34 구현하기

In [13]:
def create_resnet_34(shape=(224,224,3)):
    input_tensor = Input(shape = shape)

    # 첫번째 stage
    x = first_conv(input_tensor)

    # 두번째 stage
    x = identity_block_34(x, middle_kernel_size=3, filters=[64,64], stage=2, block=1)
    x = identity_block_34(x, middle_kernel_size=3, filters=[64,64], stage=2, block=2)
    x = identity_block_34(x, middle_kernel_size=3, filters=[64,64], stage=2, block=3)

    # 세번째 stage
    x = reduce_block_34(x, middle_kernel_size=3, filters=[128,128], stage=3, block=1)
    x = identity_block_34(x, middle_kernel_size=3, filters=[128,128], stage=3, block=2)
    x = identity_block_34(x, middle_kernel_size=3, filters=[128,128], stage=3, block=3)
    x = identity_block_34(x, middle_kernel_size=3, filters=[128,128], stage=3, block=4)

    # 네번째 stage
    x = reduce_block_34(x, middle_kernel_size=3, filters=[256,256], stage=4, block=1)
    x = identity_block_34(x, middle_kernel_size=3, filters=[256,256], stage=4, block=2)
    x = identity_block_34(x, middle_kernel_size=3, filters=[256,256], stage=4, block=3)
    x = identity_block_34(x, middle_kernel_size=3, filters=[256,256], stage=4, block=4)
    x = identity_block_34(x, middle_kernel_size=3, filters=[256,256], stage=4, block=5)
    x = identity_block_34(x, middle_kernel_size=3, filters=[256,256], stage=4, block=6)

    # 다섯번째 stage
    x = reduce_block_34(x, middle_kernel_size=3, filters=[512,512], stage=5, block=1)
    x = identity_block_34(x, middle_kernel_size=3, filters=[512,512], stage=5, block=2)
    x = identity_block_34(x, middle_kernel_size=3, filters=[512,512], stage=5, block=3)    

    # fc layer
    x = GlobalAveragePooling2D(name='GAP')(x)
    x = Dropout(rate=0.5)(x)
    x = Dense(200, activation='relu', name='fc1')(x)
    x = Dropout(rate=0.5)(x)
    output = Dense(10, activation='softmax', name='output')(x)

    model = Model(inputs=input_tensor, outputs=output)
    model.summary()

    return model



In [14]:
model_34 = create_resnet_34(shape=(224,224,3))

Model: "model_6"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1 (Conv2D)                 (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                            

## ResNet50 구현하기

In [15]:
# resnet 구현
# cifar10 기준

def create_resnet_50(shape=(224,224,3)):
    input_tensor=Input(shape=shape)

    # 첫번째 stage.
    # Conv, pool
    x = first_conv(input_tensor)

    # 두번째 stage.
    x = reduce_block_50(x, middle_kernel_size=(3,3), filters=[64,64,256], stage=2, block=1, strides=(1,1))    
    x = identity_block_50(x, middle_kernel_size=(3,3), filters=[64,64,256], stage=2, block=2)
    x = identity_block_50(x, middle_kernel_size=(3,3), filters=[64,64,256], stage=2, block=3)

    # 세번째 stage
    x = reduce_block_50(x, middle_kernel_size=(3,3), filters=[128,128,512], stage=3, block=1, strides=(2,2))    
    x = identity_block_50(x, middle_kernel_size=(3,3), filters=[128,128,512], stage=3, block=2)
    x = identity_block_50(x, middle_kernel_size=(3,3), filters=[128,128,512], stage=3, block=3)
    x = identity_block_50(x, middle_kernel_size=(3,3), filters=[128,128,512], stage=3, block=4)

    # 네번째 stage
    x = reduce_block_50(x, middle_kernel_size=(3,3), filters=[256,256,1024], stage=4, block=1, strides=(2,2))    
    x = identity_block_50(x, middle_kernel_size=(3,3), filters=[256,256,1024], stage=4, block=2)
    x = identity_block_50(x, middle_kernel_size=(3,3), filters=[256,256,1024], stage=4, block=3)
    x = identity_block_50(x, middle_kernel_size=(3,3), filters=[256,256,1024], stage=4, block=4)
    x = identity_block_50(x, middle_kernel_size=(3,3), filters=[256,256,1024], stage=4, block=5)
    x = identity_block_50(x, middle_kernel_size=(3,3), filters=[256,256,1024], stage=4, block=6)

    # 다섯번째 stage
    x = reduce_block_50(x, middle_kernel_size=(3,3), filters=[512,512,2048], stage=5, block=1, strides=(2,2))
    x = identity_block_50(x, middle_kernel_size=(3,3), filters=[512,512,2048], stage=5, block=2)
    x = identity_block_50(x, middle_kernel_size=(3,3), filters=[512,512,2048], stage=5, block=3)


    # fc layer
    x = GlobalAveragePooling2D(name='GAP')(x)
    x = Dropout(rate=0.5)(x)
    x = Dense(200, activation='relu', name='fc_1')(x)
    x = Dropout(rate=0.5)(x)
    output = Dense(10, activation='softmax', name='output')(x)

    model = Model(inputs=input_tensor, outputs=output)
    model.summary()

    return model




In [16]:
model_50 = create_resnet_50(shape=(224,224,3))

Model: "model_7"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['input_2[0][0]']                
                                                                                                  
 conv1 (Conv2D)                 (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                            

## Cifar10 데이터 셋 가져오기

In [17]:
IMAGE_SIZE = 128
BATCH_SIZE = 64



def get_preprocessed_ohe(images, labels, pre_func=None):
    if pre_func is not None:
        images = pre_func(labels)
    
    oh_labels = to_categorical(labels)
    return images, oh_labels

def get_train_valid_test_set(train_images, train_labels, test_images, test_labels, valid_size=0.5, random_state=2021):

    train_images, train_oh_labels = get_preprocessed_ohe(train_images, train_labels)
    test_images, test_oh_labels = get_preprocessed_ohe(test_images, test_labels)

    tr_images, val_images, tr_oh_labels, val_oh_labels = train_test_split(train_images, train_oh_labels, test_size=valid_size, random_state=2021)

    return (tr_images, tr_oh_labels), (val_images, val_oh_labels), (test_images, test_oh_labels)

class CIFAR_Dataset(Sequence):

    def __init__(self, images_array, labels, batch_size=BATCH_SIZE, augmentor=None, shuffle=False, pre_func=None):
        self.images_array = images_array
        self.labels = labels
        self.batch_size = batch_size
        self.augmentor = augmentor
        self.pre_func = pre_func
        self.shuffle = shuffle
        if self.shuffle:
            self.on_epoch_end()

    def __len__(self):
        
        return int(np.ceil(len(self.labels)/self.batch_size))


    def __getitem__(self, index):

        images_fetch = self.images_array[index*self.batch_size:(index+1)*self.batch_size]
        if self.labels is not None:
            label_batch = self.labels[index*self.batch_size:(index+1)*self.batch_size]
        
        image_batch = np.zeros((images_fetch.shape[0], IMAGE_SIZE, IMAGE_SIZE, 3), dtype='float32')

        for image_index in range(images_fetch.shape[0]):
            image = cv2.resize(images_fetch[image_index], (IMAGE_SIZE, IMAGE_SIZE))

            if self.augmentor is not None:
                image = self.augmentor(image=image)['image']

            if self.pre_func is not None:
                image = self.pre_func(image)

            image_batch[image_index] = image

        return image_batch, label_batch

    def on_epoch_end(self):
        if(self.shuffle):
            self.images_array, self.labels = sklearn.utils.shuffle(self.images_array, self.labels)
        else:
            pass


In [18]:
# cifar10 데이터 가져오기

(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()
print(train_images.shape, train_labels.shape, test_images.shape, test_labels.shape)

(tr_images, tr_oh_labels), (val_images, val_oh_labels), (test_image, test_oh_labels) = \
    get_train_valid_test_set(train_images, train_labels, test_images, test_labels, valid_size=0.5, random_state=2021)
print(tr_images.shape, tr_oh_labels.shape, val_images.shape, val_oh_labels.shape, test_images.shape, test_oh_labels.shape)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
(50000, 32, 32, 3) (50000, 1) (10000, 32, 32, 3) (10000, 1)
(25000, 32, 32, 3) (25000, 10) (25000, 32, 32, 3) (25000, 10) (10000, 32, 32, 3) (10000, 10)


In [19]:
# 학습용, 검증용 데이터셋 제너레이터 생성

from tensorflow.keras.applications.resnet50 import preprocess_input as resnet_preprocess

tr_ds = CIFAR_Dataset(tr_images, tr_oh_labels, batch_size = BATCH_SIZE, augmentor = None, shuffle = True, pre_func=resnet_preprocess)
val_ds = CIFAR_Dataset(val_images, val_oh_labels, batch_size=BATCH_SIZE, augmentor = None, shuffle = False, pre_func=resnet_preprocess)

print(next(iter(tr_ds))[0].shape, next(iter(val_ds))[0].shape)
print(next(iter(tr_ds))[1].shape, next(iter(val_ds))[1].shape)

(64, 128, 128, 3) (64, 128, 128, 3)
(64, 10) (64, 10)


## ResNet34를 이용한 모델 평가(cifar10)

In [22]:
from tensorflow.python.keras.callbacks import ReduceLROnPlateau, EarlyStopping
model_34 = create_resnet_34(shape=(128,128,3))

ely_stop = EarlyStopping(monitor = 'val_loss', patience=5, mode = 'min', verbose=1)

model_34.compile(optimizer = Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy']) 

history = model_34.fit(tr_ds, epochs=20, validation_data = val_ds,callbacks=[ely_stop])

Model: "model_10"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_5 (InputLayer)           [(None, 128, 128, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 134, 134, 3)  0           ['input_5[0][0]']                
                                                                                                  
 conv1 (Conv2D)                 (None, 64, 64, 64)   9472        ['conv1_pad[0][0]']              
                                                                                                  
 conv1_nm (BatchNormalization)  (None, 64, 64, 64)   256         ['conv1[0][0]']           

  super(Adam, self).__init__(name, **kwargs)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 00019: early stopping


In [23]:
test_ds = CIFAR_Dataset(test_images, test_oh_labels, batch_size=BATCH_SIZE, augmentor=None, shuffle=False, pre_func=resnet_preprocess)
model_34.evaluate(test_ds)



[1.470077395439148, 0.6388000249862671]

## ResNet50을 이용한 모델 평가(cifar10)

In [24]:
model_50 = create_resnet_34(shape=(128,128,3))

ely_stop = EarlyStopping(monitor = 'val_loss', patience=5, mode = 'min', verbose=1)

model_50.compile(optimizer = Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy']) 

history = model_50.fit(tr_ds, epochs=20, validation_data = val_ds,callbacks=[ely_stop])

Model: "model_11"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_6 (InputLayer)           [(None, 128, 128, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 134, 134, 3)  0           ['input_6[0][0]']                
                                                                                                  
 conv1 (Conv2D)                 (None, 64, 64, 64)   9472        ['conv1_pad[0][0]']              
                                                                                                  
 conv1_nm (BatchNormalization)  (None, 64, 64, 64)   256         ['conv1[0][0]']           

  super(Adam, self).__init__(name, **kwargs)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 00020: early stopping


In [25]:
test_ds = CIFAR_Dataset(test_images, test_oh_labels, batch_size=BATCH_SIZE, augmentor=None, shuffle=False, pre_func=resnet_preprocess)
model_50.evaluate(test_ds)



[0.8663582801818848, 0.7573000192642212]

## 회고

- 모델을 만들면서 주로 오류가 났던 부분은 shortcut과 residual block의 결과값을 합하는 부분이었다. stage2부터 shortcut으로 들어오는 inputdata의 채널값과 residual block의 결과값이 다른 경우가 있다. ex) input_data = (56,56,64), residual_ouput = (56,56,256). 이럴 경우, shortcut에 1x1 conv 연산을 통해 채널수를 256으로 늘려줘야 한다. CNN에서는 feature map의 사이즈를 계산하는 것이 중요하다는 것을 다시한번 깨닫는다.

- 가장 허탈했던 오류는 'Shapes (None, None) and (None, 56, 56, 256) are incompatible'였다. 분명 모델 summary를 봤을 때는 문제가 없었는데 왜 계속 저 오류가 나오는지 몰라서 헤매다가 발견한 것이 모델변수명었다. 내가 만든 모델의 변수명은 model_34인데 model.fit으로 돌리고 있었으니 오류가 나는 것이었다. 변수명에 유의하자.

- ResNet34와 ResNet50의 성능을 cifar10을 이용해 비교해 본 결과, layer가 더 깊은 50에서 좀 더 좋은 성능을 보였다. 시간관계상 이미지 사이즈를 128x128로 바꾸고 epoch를 20까지만 해서 돌렸다.