# 이미지를 훈련, 검증, 테스트 디렉터리로 복사하기

In [4]:
import os, shutil, pathlib

original_dir = pathlib.Path('dogs-vs-cats/train')
new_base_dir = pathlib.Path('dogs-vs-cats/cats_vs_dogs_small')
def make_subset(subset_name, start_index, end_index):
    for category in('cat', 'dog'):
        dir = new_base_dir / subset_name / category
        os.makedirs(dir)
        fnames = [f'{category}.{i}.jpg'
                 for i in range(start_index, end_index)]
        for fname in fnames:
            shutil.copyfile(src = original_dir / fname, 
                           dst = dir / fname)
            
make_subset('train', start_index = 0, end_index = 1000)
make_subset('validation', start_index = 1000, end_index = 1500)
make_subset('test', start_index = 1500, end_index = 2500)

# 모델 만들기

## 강아지 vs 고양이 분류를 위한 소규모 컨브넷 만들기

In [6]:
from tensorflow import keras
from tensorflow.keras import layers

inputs  = keras.Input(shape = (180, 180, 3))
x = layers.Rescaling(1./255)(inputs)
x = layers.Conv2D(filters = 32, kernel_size = 3, activation  = 'relu')(x)
x = layers.MaxPooling2D(pool_size = 2)(x)
x = layers.Conv2D(filters = 64, kernel_size = 3, activation  = 'relu')(x)
x = layers.MaxPooling2D(pool_size = 2)(x)
x = layers.Conv2D(filters = 128, kernel_size = 3, activation  = 'relu')(x)
x = layers.MaxPooling2D(pool_size = 2)(x)
x = layers.Conv2D(filters = 256, kernel_size = 3, activation  = 'relu')(x)
x = layers.MaxPooling2D(pool_size = 2)(x)
x = layers.Conv2D(filters = 256, kernel_size = 3, activation  = 'relu')(x)
x = layers.Flatten()(x)
outputs = layers.Dense(1, activation = 'sigmoid')(x)
model = keras.Model(inputs = inputs, outputs = outputs)

In [7]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 180, 180, 3)]     0         
                                                                 
 rescaling_1 (Rescaling)     (None, 180, 180, 3)       0         
                                                                 
 conv2d_3 (Conv2D)           (None, 178, 178, 32)      896       
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 89, 89, 32)       0         
 2D)                                                             
                                                                 
 conv2d_4 (Conv2D)           (None, 87, 87, 64)        18496     
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 43, 43, 64)       0         
 2D)                                                         

## 모델 훈련 설정하기

In [8]:
model.compile(loss = 'binary_crossentropy',
             optimizer = 'rmsprop',
             metrics = ['accuracy'])

# 데이터 전처리

## image_dataset_from_directory를 사용하여 이미지 읽기

In [9]:
from tensorflow.keras.utils import image_dataset_from_directory

train_dataset = image_dataset_from_directory(
    new_base_dir / 'train',
    image_size = (180, 180),
    batch_size = 32)
validation_dataset = image_dataset_from_directory(
    new_base_dir / 'validation',
    image_size = (180, 180),
    batch_size = 32)
test_dataset = image_dataset_from_directory(
    new_base_dir / 'test',
    image_size = (180, 180),
    batch_size = 32)

Found 2000 files belonging to 2 classes.
Found 1000 files belonging to 2 classes.
Found 2000 files belonging to 2 classes.


## Dataset이 반환하는 데이터와 레이블 크기 확인하기

In [11]:
for data_batch, labels_batch in train_dataset:
    print('데이터 배치 크기 :', data_batch.shape)
    print('레이블 배치 크기 :', labels_batch.shape)
    break

데이터 배치 크기 : (32, 180, 180, 3)
레이블 배치 크기 : (32,)


## Dataset을 사용하여 모델 훈련하기

In [None]:
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = 'dogs-vs-cats/convert_from_scratch.keras',
        save_best_only = True,
        monitor = 'val_loss')
]
history = model.fit(
    train_dataset,
    epochs = 30, 
    validation_data = validation_dataset,
    callbacks = callbacks)

Epoch 1/30
 9/63 [===>..........................] - ETA: 38s - loss: 0.8249 - accuracy: 0.5104