In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import matplotlib.pyplot as plt

In [2]:
gen = ImageDataGenerator(rotation_range = 20,
                        shear_range = 0.1,
                        width_shift_range = 0.2,
                        height_shift_range = 0.2,
                        horizontal_flip = True)

In [3]:
# CIFAR-10 데이터셋을 읽고 신경망에 입력할 형태로 변환
(x_train, y_train),(x_test, y_test) = cifar10.load_data()

# 정규화
x_train = x_train.astype(np.float32)/255.0
x_test = x_test.astype(np.float32)/255.0

In [7]:
# 보강할 학습데이터 이미지 생성
augment_ratio = 1.5     # 전체 데이터의 150%
augment_size = int(augment_ratio * x_train.shape[0])

print(augment_size)

print('-'*50)

# 전체 x_train 개수의 150% 비율만큼
randidx = np.random.randint(x_train.shape[0], size=augment_size)

# 임의로 선택된 데이터는 원본데이터를 참조하기 때무에
# 원본데이터에 영향을 줄 수 있음. 그래서 copy() 함수를 통해 안전하게 복사본 만든
x_augmented = x_train[randidx].copy()
y_augmented = y_train[randidx].copy()
# print(x_augmented, y_augmented)
# print('-'*50)


# 이미지 보강 실행
x_augmented, y_augmented = gen.flow(x_augmented, y_augmented,
                                    batch_size = augment_size,
                                    shuffle=False).next()

print(x_augmented.shape, y_augmented.shape)

187500
--------------------------------------------------
--------------------------------------------------
(187500, 32, 32, 3) (187500, 1)


In [5]:
# x_train, y_train에 보강된 데이터 추가
x_train = np.concatenate( (x_train, x_augmented) )
y_train = np.concatenate( (y_train, y_augmented) )

print(x_train.shape, y_train.shape)

(125000, 32, 32, 3) (125000, 1)


In [8]:
cnn = Sequential()

# cnn.add(Conv2D(input_shape=(32,32,3) , kernel_size=(3,3), padding='same' ,filters=32 ,activation='relu' ))
cnn.add(Conv2D(32, (3,3), padding='same' ,activation='relu', input_shape=(32,32,3) ))
cnn.add(Conv2D(32, (3,3),padding='same', activation='relu'))
cnn.add(MaxPool2D(pool_size=(2,2)))
cnn.add(Dropout(0.25))

cnn.add(Conv2D(64, (3,3),padding='same', activation='relu'))
cnn.add(Conv2D(64, (3,3),padding='same', activation='relu'))
cnn.add(MaxPool2D(pool_size=(2,2)))
cnn.add(Dropout(0.25))

# cnn.add(Conv2D(128, (3,3),padding='same', activation='relu'))
# cnn.add(MaxPool2D(pool_size=(2,2)))
# cnn.add(Dropout(0.25))

# cnn.add(Conv2D(256, (3,3),padding='same', activation='relu'))
# cnn.add(MaxPool2D(pool_size=(2,2)))
# cnn.add(Dropout(0.25))


cnn.add(Flatten())

cnn.add(Dense(128, activation='relu'))
cnn.add(Dropout(0.5))
cnn.add(Dense(10, activation='softmax'))


cnn.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 32, 32, 32)        896       
                                                                 
 conv2d_5 (Conv2D)           (None, 32, 32, 32)        9248      
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 16, 16, 32)       0         
 2D)                                                             
                                                                 
 dropout_3 (Dropout)         (None, 16, 16, 32)        0         
                                                                 
 conv2d_6 (Conv2D)           (None, 16, 16, 64)        18496     
                                                                 
 conv2d_7 (Conv2D)           (None, 16, 16, 64)        36928     
                                                      

In [9]:
from datetime import datetime

start_time = datetime.now() # 현재 시간

cnn.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])

# 손실함수가 5 epochs을 진행을 검사하여 더 이상 줄어들지 않으면 종료
# EarlyStopping(모니터링 값, 대기 epochs)

early_stoping = EarlyStopping(monitor='val_loss', patience=5)

# 학습 중인 모델을 자동 저장
model_checkpoint= ModelCheckpoint(filepath="/data/bast.h5",
                                  monitor='val_loss',
                                  save_base_only = True,
                                  verbose = 1)

hist = cnn.fit(x_train, y_train, batch_size = 256, epochs = 50, validation_data =(x_test, y_test),
               callbacks = [early_stoping, model_checkpoint])

# hist = cnn.fit(x_train, t_train, batch_size = 128, epochs = 30, validation_data =(x_test, t_test))

end_time = datetime.now()
print('Elapsed Time => ', end_time - start_time)

Epoch 1/50
Epoch 1: saving model to /data\bast.h5
Epoch 2/50
Epoch 2: saving model to /data\bast.h5
Epoch 3/50
Epoch 3: saving model to /data\bast.h5
Epoch 4/50
Epoch 4: saving model to /data\bast.h5
Epoch 5/50
Epoch 5: saving model to /data\bast.h5
Epoch 6/50
Epoch 6: saving model to /data\bast.h5
Epoch 7/50
 13/489 [..............................] - ETA: 3:35 - loss: 0.9372 - accuracy: 0.6782

KeyboardInterrupt: 

In [None]:
plt.title('loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.plot(hist.history['loss'],label='train loss')
plt.plot(hist.history['val_loss'], label='validation loss')
plt.legend(loc='best') # 범례를 최적의 위치에 알아서 위치시켜줌
plt.show()

In [None]:
plt.title('Accuracy')
plt.xlabel('epoch')
plt.ylabel('Accuracy')
plt.plot(hist.history['accuracy'],label='train accuracy')
plt.plot(hist.history['val_accuracy'], label='validation accuracy')
plt.legend(loc='best') # 범례를 최적의 위치에 알아서 위치시켜줌
plt.show()