In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
%config Completer.use_jedi = False

In [3]:
import tensorflow as tf

print(tf.__version__)

2024-03-05 13:59:46.299179: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-05 13:59:46.299285: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-05 13:59:46.408955: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


2.15.0


### Pretrained 된 VGG16 모델을 로드하여 VGG의 구조 확인

In [4]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model

input_tensor = Input(shape=(224, 224, 3))
base_model = VGG16(input_tensor=input_tensor, include_top=True, weights='imagenet')
model = Model(inputs=input_tensor, outputs=base_model.output)

model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
[1m553467096/553467096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 0us/step


In [5]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Conv2D, Dropout, Flatten, Activation, MaxPooling2D, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint, LearningRateScheduler

def create_vggnet(in_shape=(224,224,3), n_classes=10):
    input_tensor = Input(shape=in_shape)
    
    # block1
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(input_tensor)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
    x = MaxPooling2D((2,2), strides=(2,2), name='block1_pool')(x)
    
    # block2
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
    
    # block 3
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # block 4
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # block 5
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)
    
    x = GlobalAveragePooling2D()(x)
    x = Dense(4096, activation='relu')(x)
    x = Dense(1000, activation='relu')(x)
    
    output = Dense(units=n_classes, activation='softmax')(x)
    
    model = Model(inputs=input_tensor, outputs=output)
    model.summary()
    
    return model

In [6]:
model = create_vggnet(in_shape=(224,224,3), n_classes=10)

### VGG16의 연속된 conv를 하나의 block으로 보고 이를 conv_block()함수로 묶는다.
* conv_block() 함수는 인자로 입력 feature map과 conv연산에 사용될 커널의 필터 개수와 사이즈(3x3), 그리고 출력 feature map 크기를 줄이기 위한 strides를 받는다.
* 또한 repeats 인자를 통해서 연속된 conv 연산의 횟수를 정한다.

In [7]:
from tensorflow.keras.layers import Conv2D, Dense, MaxPooling2D, GlobalAveragePooling2D, Input
from tensorflow.keras.models import Model

# 인자로 입력된 input_tensor에 kernel 크기 3x3(default), 필터 개수 filters로 conv 연산을 정해진 횟수만큼 적용해 출력 feature map을 생성
# repeats인자를 통해 연속으로 conv 연산 수행 횟수를 정함
# 마지막에 MaxPooling(2x2), strides=2 로 출력 feature map의 크기를 절반으로 줄임. 인자로 들어온 strides는 MaxPooling에 사용되는 strides임.
def conv_block(tensor_in, filters, kernel_size, repeats=2, pool_strides=(2,2), block_id=1):
    '''
    파라미터 설명
    tensor_in: 입력 이미지 tensor 또는 입력 feature map tensor
    filters: conv 연산 filter개수
    kernel_size: conv 연산 filter 개수
    repeats: conv 연산 적용 횟수(Conv2D Layer 수)
    pool_strides: maxpooling의 strides이다. conv의 strides는 (1,1)이다.
    ''' 
    x = tensor_in
    
    # 인자로 들어온 repeats만큼 동일한 conv연산을 수행함.
    for i in range(repeats):
        # Conv 이름
        conv_name = 'block' + str(block_id) + '_conv' + str(i+1)
        x = Conv2D(filters=filters, kernel_size=kernel_size, activation='relu', padding='same', name=conv_name)(x)
    
    # maxpooling을 적용해 출력 feature map의 크기를 절반으로 줄임. 함수인자로 들어온 strides를 MaxPooling2D()에 인자로 입력.
    x = MaxPooling2D((2,2), strides=pool_strides, name='block'+str(block_id)+'_pool')(x)
    
    return x

### 생성한 conv_block()을 이용해서 convolution block을 생성

In [8]:
input_tensor = Input(shape=(224,224,3), name='test_input')
x = conv_block(tensor_in=input_tensor, filters=64, kernel_size=(3,3), repeats=3, pool_strides=(2,2), block_id=1)

conv_layers = Model(inputs=input_tensor, outputs=x)
conv_layers.summary()

### VGG16 모델을 생성
* 앞에서 만든 conv_block()을 이용하여 block별로 생성
* 1번에서 4번 block까지는 입력 feature map 대비 출력 feature map의 필터수는 2배, 크기는 절반으로 줄인다. 5번 block은 filter수는 그대로 두고 크기만 절반으로 줄인다.
* 논문대로 네트워크 구성 시 fully connected layer에서 많은 파라미터가 필요하므로 globalaveragepooling 사용

In [9]:
def create_vggnet_by_block(in_shape=(224,224,3), n_classes=10):
    input_tensor = Input(shape=in_shape, name='Input_tensor')
    # (입력 image tensor 또는 feature map) -> Conv -> Relu를 순차적을 2번 실행, 출력 Feature map의 filter 수는 64개. 크기는 MaxPooling으로 절반.
    x = conv_block(input_tensor, filters=64, kernel_size=(3,3), repeats=2, pool_strides=(2,2), block_id=1)
    
    # Conv 연산 2번 반복, 입력 feature map의 filter 수를 2배로(128개), 크기는 절반으로 출력 feature map 생성
    x = conv_block(x, filters=128, kernel_size=(3,3), repeats=2, pool_strides=(2,2), block_id=2)
    
    # Conv 연산 3번 반복, 입력 feature map의 filter 수를 2배로(256개), 크기는 절반으로 출력 feature map 생성
    x = conv_block(x, filters=256, kernel_size=(3, 3), repeats=3, pool_strides=(2, 2), block_id=3)
    
    # Conv연산 3번 반복, 입력 feature map의 filter 수를 2배로(512개), 크기는 절반으로 출력 feature map 생성.  
    x = conv_block(x, filters=512, kernel_size=(3, 3), repeats=3, pool_strides=(2, 2), block_id=4)
    
    # Conv 연산 3번 반복, 입력 feature map의 filter 수 그대로(512), 크기는 절반으로 출력 feature map 생성.  
    x = conv_block(x, filters=512, kernel_size=(3, 3), repeats=3, pool_strides=(2, 2), block_id=5)
    
    # GlobalAveragePooling으로 Flatten적용. 
    x = GlobalAveragePooling2D()(x)
    x = Dense(4096, activation='relu')(x)
    x = Dense(1000, activation='relu')(x)
    
    output = Dense(units=n_classes, activation='softmax')(x)
    
    model = Model(inputs=input_tensor, outputs=output, name='vgg_by_block')
    model.summary()
    
    return model
    

In [10]:
model =  create_vggnet_by_block(in_shape=(224, 224, 3), n_classes=10)

### CIFAR10 데이터 세트로 VGG16 모델 학습 및 성능 테스트

In [11]:
IMAGE_SIZE = 128
BATCH_SIZE = 64

### 데이터 전처리/인코딩/스케일링 함수 및 CIAFAR_dataset

In [12]:
import tensorflow as tf
import numpy as np
import pandas as pd

import random as python_random
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import Sequence
import cv2
import sklearn

def zero_one_scaler(image):
    return image/255.0

def get_preprocessed_ohe(images, labels, pre_func=None):
    if pre_func is not None:
        images = pre_func(images)
        
    '''
    OHE 적용
    참고로 dataframe에서 OHE을 적용하고 싶으면 pandas의 get_dummies를 쓰고
    혹은 그냥 numpy array에서 바로 OHE을 적용하고 싶으면 keras의 to_categorical을 쓴다.
    '''
    
    oh_labels = to_categorical(labels)
    
    return images, oh_labels

# 학습/검증/테스트 데이터 세트에 전처리 및 OHE 적용한 뒤에 반환
def get_train_valid_test_set(train_images, train_labels, test_images, test_labels, valid_size=0.15, random_state=42):
    # 학습 및 텟그트 데이터 세트를 0 ~ 1 사잇값 float32로 변환하고 OHE 적용
    train_images, train_oh_labels = get_preprocessed_ohe(train_images, train_labels)
    test_images, test_oh_labels = get_preprocessed_ohe(test_images, test_labels)
    
    # 학습 데이터를 학습과 검증용 데이터 세트로 분리
    tr_images, val_images, tr_oh_labels, val_oh_labels = train_test_split(train_images, train_oh_labels, test_size=valid_size, random_state=random_state)
    
    return (tr_images, tr_oh_labels), (val_images, val_oh_labels), (test_images, test_oh_labels)

from tensorflow.keras.utils import Sequence
import cv2
import sklearn

class CIFAR10_Dataset(Sequence):
    def __init__(self, images_array, labels, batch_size=BATCH_SIZE, augmentor=None, shuffle=False, pre_func=None):
        '''
        파라미터 설명
        images_array: 원본 32x32 만큼의 image 배열값. 
        labels: 해당 image의 label들
        batch_size: __getitem__(self, index) 호출 시 마다 가져올 데이터 batch 건수
        augmentor: albumentations 객체
        shuffle: 학습 데이터의 경우 epoch 종료시마다 데이터를 섞을지 여부
        '''
        
        # 객체 생성 인자로 들어온 값을 객체 내부 변수로 할당. 
        # 인자로 입력되는 images_array는 전체 32x32 image array임.
        # 여기서 32x32로 들어온 이미지를 128x128로 키워야 한다.
        self.images_array = images_array
        self.labels = labels
        self.batch_size = batch_size
        self.augmentor = augmentor
        self.pre_func = pre_func
        # train data의 경우
        self.shuffle = shuffle
        if self.shuffle:
            pass
        
    # Sequence를 상속받은 Dataset은 batch_size 단위로 입력된 데이터를 처리한다.
    # __len__()은 전체 데이터 건수가 주어졌을 때 batch_size 단위로 몇 번 데이터를 변환하는지 나타낸다.
    def __len__(self):
        # batch_size 단위로 데이터를 몇 번 갖고와여 하는지 계산하기 위해서 전체 데이터 건수를 batch_size로 나눈다.
        # 하지만, 정수로 나누어 떨어지지 않을 경우 +1을 한다.
        return int(np.ceil(len(self.labels)/self.batch_size))
    
    # batch_size 단위로 image_array, label_array 데이터를 가져와서 변환한 뒤에 다시 반환
    # 인자로 몇 번째 batch인지를 나타내는 index를 입력하면 해당 순서에 해당하는 batch_size 만큼의 데이터를 가공하여 반환
    # batch_size 개수만큼 변환된 image_array의 label_array 변환
    def __getitem__(self, index):
        # index는 몇 번째 batch인지를 나타낸다.
        # 32x32 image array를 self.batch_size만큼 갖고 옴
        image_fetch = self.images_array[index*self.batch_size:(index+1)*self.batch_size]
        label_batch = self.labels[index*self.batch_size:(index+1)*self.batch_size]
        
        image_batch = np.zeros((image_fetch.shape[0], IMAGE_SIZE, IMAGE_SIZE, 3), dtype='float32')
        
        # batch_size에 담긴 건수만큼 iteration하면서 opencv image load -> image augmentation 변환 -> image batch에 담음.
        for image_index in range(image_fetch.shape[0]):
            # 원본 이미지 크기를 IMAGE_SIZE 크기로 resize
            image = cv2.resize(image_fetch[image_index], (IMAGE_SIZE, IMAGE_SIZE))
            # 먼역 augmentor가 주어진다면 활용
            if self.augmentor is not None:
                image = self.augmentor(image=image)['image']
                
            if self.pre_func is not None:
                image = self.pre_func(image)
                
            # image batch에 순차적으로 변환된 image를 담음.
            image_batch[image_index] = image
        
        return image_batch, label_batch
    
    # epoch가 한 번 수행이 끝날 때마다 모델의 fit에서 호출된다
    def on_epoch_end(self):
        if self.shuffle:
            self.images_array, self.labels = sklearn.utils.shuffle(self.images_array, self.labels)
        else:
            pass
    

### 원 핫 인코딩 및 스케일링, 학습/검증/테스트 데이터 분할

In [13]:
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()
print(train_images.shape, train_labels.shape, test_images.shape, test_labels.shape)

(tr_images, tr_oh_labels), (val_images, val_oh_labels), (test_images, test_oh_labels) = \
    get_train_valid_test_set(train_images, train_labels, test_images, test_labels, valid_size=0.2, random_state=42)
print(tr_images.shape, tr_oh_labels.shape, val_images.shape, val_oh_labels.shape, test_images.shape, test_oh_labels.shape)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 0us/step
(50000, 32, 32, 3) (50000, 1) (10000, 32, 32, 3) (10000, 1)
(40000, 32, 32, 3) (40000, 10) (10000, 32, 32, 3) (10000, 10) (10000, 32, 32, 3) (10000, 10)


### 학습, 검증용 CIFAR10_dataset 생성
* 32x32 image array를 배치 개수만큼만 128x128로 변경
* scaling은 VGG 원래 구현 시 사용한 채널별 값에 [103.939, 116.779, 123.68]을 뺀다

In [14]:
from tensorflow.keras.applications.vgg16 import preprocess_input as vgg_preprocess

tr_ds = CIFAR10_Dataset(tr_images, tr_oh_labels, batch_size=BATCH_SIZE, augmentor=None, shuffle=True, pre_func=vgg_preprocess)
val_ds = CIFAR10_Dataset(val_images, val_oh_labels, batch_size=BATCH_SIZE, augmentor=None, shuffle=False, pre_func=vgg_preprocess)

print(next(iter(tr_ds))[0].shape, next(iter(val_ds))[0].shape)
print(next(iter(tr_ds))[1].shape, next(iter(val_ds))[1].shape)
# 채널별 값 - mean = [103.939, 116.779, 123.68]
print(next(iter(tr_ds))[0][0])

(64, 128, 128, 3) (64, 128, 128, 3)
(64, 10) (64, 10)
[[[-60.939003 -80.779    -89.68    ]
  [-60.939003 -80.779    -89.68    ]
  [-59.939003 -80.779    -89.68    ]
  ...
  [-25.939003 -58.779    -77.68    ]
  [-26.939003 -59.779    -78.68    ]
  [-26.939003 -59.779    -78.68    ]]

 [[-60.939003 -80.779    -89.68    ]
  [-60.939003 -80.779    -89.68    ]
  [-59.939003 -80.779    -89.68    ]
  ...
  [-25.939003 -58.779    -77.68    ]
  [-26.939003 -59.779    -78.68    ]
  [-26.939003 -59.779    -78.68    ]]

 [[-61.939003 -80.779    -89.68    ]
  [-61.939003 -80.779    -89.68    ]
  [-60.939003 -80.779    -89.68    ]
  ...
  [-25.939003 -58.779    -77.68    ]
  [-26.939003 -59.779    -78.68    ]
  [-26.939003 -59.779    -78.68    ]]

 ...

 [[-68.939    -84.779    -87.68    ]
  [-68.939    -84.779    -87.68    ]
  [-68.939    -84.779    -87.68    ]
  ...
  [-49.939003 -65.779    -70.68    ]
  [-48.939003 -63.779    -68.68    ]
  [-48.939003 -63.779    -68.68    ]]

 [[-68.939    -84.77

In [15]:
tf.keras.backend.clear_session()

vgg_model = create_vggnet_by_block(in_shape=(128,128,3), n_classes=10)

vgg_model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

rlr_cb = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, mode='min', verbose=1)
ely_cb = EarlyStopping(monitor='val_loss', patience=10, mode='min', verbose=1)

history = vgg_model.fit(tr_ds, epochs=20, validation_data=val_ds, callbacks=[rlr_cb, ely_cb])

Epoch 1/20


  self._warn_if_super_not_called()
2024-03-05 14:00:44.818412: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 9: 3.5242, expected 3.06308
2024-03-05 14:00:44.818473: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 80: 3.59666, expected 3.13554
2024-03-05 14:00:44.818543: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 12288: 3.0365, expected 2.57538
2024-03-05 14:00:44.818553: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 12416: 3.21471, expected 2.75359
2024-03-05 14:00:44.818605: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 16324: 3.45034, expected 2.98922
2024-03-05 14:00:44.818622: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 16384: 4.79805, expected 4.13711
2024-03-05 14:00:44.818632: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 16388: 5.03736, expected 4.37643
2

[1m  1/625[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m5:42:29[0m 33s/step - accuracy: 0.1094 - loss: 2.3224

I0000 00:00:1709647271.632189      73 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1709647271.659463      73 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130ms/step - accuracy: 0.1009 - loss: 5.4872

W0000 00:00:1709647354.148857      72 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
2024-03-05 14:02:40.852579: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 12288: 3.16341, expected 2.72723
2024-03-05 14:02:40.852630: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 12416: 3.00427, expected 2.56808
2024-03-05 14:02:40.852655: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 16384: 4.67983, expected 3.93377
2024-03-05 14:02:40.852663: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 16385: 6.45374, expected 5.70769
2024-03-05 14:02:40.852671: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 16386: 6.1411, expected 5.39504
2024-03-05 14:02:40.852679: E external/local_xla/xla/service/gpu/buffer_comparator.cc:1137] Difference at 16387: 5.93718, expected 5.19113
2024-03-05 14:02:40.852686: E external/loc

[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 149ms/step - accuracy: 0.1009 - loss: 5.4830 - val_accuracy: 0.0996 - val_loss: 2.3033 - learning_rate: 0.0010
Epoch 2/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 141ms/step - accuracy: 0.1110 - loss: 2.3800 - val_accuracy: 0.2583 - val_loss: 1.9956 - learning_rate: 0.0010
Epoch 3/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 141ms/step - accuracy: 0.2824 - loss: 1.9494 - val_accuracy: 0.3495 - val_loss: 1.7903 - learning_rate: 0.0010
Epoch 4/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 141ms/step - accuracy: 0.3644 - loss: 1.7465 - val_accuracy: 0.4057 - val_loss: 1.6178 - learning_rate: 0.0010
Epoch 5/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 141ms/step - accuracy: 0.4046 - loss: 1.6331 - val_accuracy: 0.4272 - val_loss: 1.5625 - learning_rate: 0.0010
Epoch 6/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

### VGG16 모델 생성 후 학습 및 성능 검증

In [16]:
from tensorflow.keras.applications.vgg16 import preprocess_input as vgg_preprocess

test_ds = CIFAR10_Dataset(test_images, test_oh_labels, batch_size=BATCH_SIZE, augmentor=None, shuffle=False, pre_func=vgg_preprocess)
vgg_model.evaluate(test_ds)

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 43ms/step - accuracy: 0.0989 - loss: 2.3027


[2.3026247024536133, 0.10000000149011612]