In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import numpy as np
import matplotlib.pyplot as plt

1. Data 다운받기
2. Data Pipeline 구축(Generator 구축 - Augmentation!)
3. Data 가져오기 (flow_from_dirctory)
4. Model 제작 (Flatten 이전에 Dropout)
5. Model 컴파일 - loss, optimizer, metrics
6. Model 훈련 - fit.generator(훈련 데이터, epoch수, validation data)
7. Model 성능 분석 - train accuracy, validation accuray w.r.t epochs
8. Model 사용하기 - 훈련할 때 사용한 객체.predict -> 확률값 가지는 배열 반환

In [None]:
#Data 다운받기 - URL이용

_URL = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'

#(데이터 이름, URL, 압축 여부)
zip_dir = tf.keras.utils.get_file('cats_and_dogs_filtered.zip', origin=_URL, extract=True)

In [None]:
#Directory 나누기

base_dir = os.path.join(os.path.dirname(zip_dir), 'cats_and_dogs_filtered')
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')

train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')
validation_cats_dir = os.path.join(validation_dir, 'cats')
validation_dogs_dir = os.path.join(validation_dir, 'dogs')

In [None]:
#데이터 개수

num_cats_tr = len(os.listdir(train_cats_dir))
num_dogs_tr = len(os.listdir(train_dogs_dir))

num_cats_val = len(os.listdir(validation_cats_dir))
num_dogs_val = len(os.listdir(validation_dogs_dir))

total_train = num_cats_tr + num_dogs_tr
total_val = num_cats_val + num_dogs_val

print('total training cat images:', num_cats_tr)
print('total training dog images:', num_dogs_tr)

print('total validation cat images:', num_cats_val)
print('total validation dog images:', num_dogs_val)
print("--")
print("Total training images:", total_train)
print("Total validation images:", total_val)

In [None]:
BATCH_SIZE = 100
IMAGE_SIZE = 150

In [None]:
# This function will plot images in the form of a grid with 1 row and 5 columns where images are placed in each column.
def plotImages(images_arr):
    #plt.subplots 이용! 1행 5열
    fig, axes = plt.subplots(1, 5, figsize=(20, 20))
    axes = axes.flatten()
    for img, ax in zip(images_arr, axes):
        ax.imshow(img)
    plt.tight_layout()
    plt.show()

In [None]:
#Image Augmentation
# ImageDataGenerator를 사용해 만든 image_gen 객체에 여러가지 argument를 추가해서
# 수행할 수 있음!!

#이미지가 변형되어 저장되도록 만드는 generator
image_gen = ImageDataGenerator(rescale = 1./255, horizontal_flip = True)

#실제로 이미지를 저장하는 객체 : 앞에서 정의한 generator.flow_from_directory
#>>로컬에서 이미지 가져옴!

train_data_gen = image_gen.flow_from_directory(batch_size = BATCH_SIZE,
                                              directory = train_dir,
                                              shuffle = True,
                                              target_size = (IMG_SHAPE, IMG_SHAPE))

In [None]:
# train_data_gen에서 이미지 1개 뽑는 과정 5회 반복

augmented_images = [train_data_gen[0][0][0] for i in range(5)]
plotImages(augmented_images)

In [None]:
image_gen = ImageDataGenerator(rescale=1./255, rotation_range=45)

train_data_gen = image_gen.flow_from_directory(batch_size=BATCH_SIZE,
                                               directory=train_dir,
                                               shuffle=True,
                                               target_size=(IMG_SHAPE, IMG_SHAPE))

In [None]:
#회전 변형을 하는 과정

image_gen = ImageDataGenerator(rescale = 1./255, rotation_range = 45)

train_data_gen = image_gen.flow_from_directory(batch_size = BATCH_SIZE,
                                              directory = train_dir,
                                              shuffle = True,
                                              target_size = (IMG_SHAPE, IMG_SHAPE))

In [None]:
augmented_images = [train_data_gen[0][0][0] for i in range(5)]
plotImages(augmented_images)

In [None]:
image_gen = ImageDataGenerator(rescale=1./255, zoom_range=0.5)

train_data_gen = image_gen.flow_from_directory(batch_size=BATCH_SIZE,
                                               directory=train_dir,
                                               shuffle=True,
                                               target_size=(IMG_SHAPE, IMG_SHAPE))

In [None]:
#여러 개의 augmentation 한 번에 적용하기!

image_gen_train = ImageDataGenerator(
                rescale = 1./255,
                rotation_range = 40,
                shear_range = 0.2,
                zoom_range = 0.2,
                horizontal_flip = True,
                width_shift_range = 0.2,
                height_shift_range = 0.2,
                fill_mode = ' nearest ')

train_data_gen = image_gen_train.flow_from_directory(batch_size = BATCH_SIZE,
                                                    directory = train_dir,
                                                    shuffle = True,
                                                    target_size = (IMG_SHAPE, IMGSHAPE),
                                                    class_mode = 'binary')

In [None]:
#validation data에는 augementation 적용하지 않음!
#(이미지 크기 변환 제외 - 모델 인풋은 모두 같은 크기여야 하므로)

image_gen_val = ImageDataGenerator(rescale=1./255)

val_data_gen = image_gen_val.flow_from_directory(batch_size=BATCH_SIZE,
                                                 directory=validation_dir,
                                                 target_size=(IMG_SHAPE, IMG_SHAPE),
                                                 class_mode='binary')

In [None]:
#model 제작하기

model = tf.keras.Sequential([
    
    tf.keras.layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = (150, 150, 3)),
    tf.keras.layers.MaxPooling2D((2, 2), strides = 2),
    
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    
    #Dropout 레이어에 들어오는 모든 값은, 0.5의 확률로 '0'으로 만듦 -> 사실상 훈련에
    #사용되지 않는 효과!
    tf.keras.layers.Dropout(0.5)
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation = 'relu'),
    tf.keras.layers.Dense(2, activation = 'softmax')

])

In [None]:
#model 컴파일

model.compile(optimizer = 'adam',
             loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
             metrics = ['accuracy'])

In [None]:
#model 훈련

EPOCHS = 100

history = model.fit_generator(
                              train_data_gen,
                              epochs = EPOCHS,
                              steps_per_epoch = int(np.ceil(total_train / float(BATCH_SIZE))),
                              validation_data = val_data_gen,
                              validation_steps = int(np.ceil(total_val / float(BATCH_SIZE)))
                             )

In [None]:
#model 훈련 결과 그래프로 그리기
#training accuracy, validation accuracy w.r.t epoch

#History(학습한 모델 객체).history['accuracy'] 메타데이터로 데이터 가져옴.
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

#그래프 그리기!

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

그래프를 그려보면, 이전과 다르게 training accuracy, 그리고 validation accuracy가
epoch 60에 도달할 때까지 비스무리하게 증가하는 걸 볼 수 있음!
또한, epoch 60 이전까지는 training loss ,validation loss도 마찬가지로 비슷하게 감소함.
하지만 epoch 60이후로는 training accuracy는 빠르게 증가하지만, validation accuracy는 감소하므로 현 상태의 overfitting 방지 기법(augmentation, dropout)으론 epoch 60에서 훈련을 멈추는 것이 좋음.
>> 이전 모델보다는 훨씬 더 정확한 모델 만들 수 있음! (epoch 5 vs 60)