파일 구조 설정 및 Data 준비

In [3]:
import numpy as np 
import pandas as pd
import os

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

Dataset 압축 해제

In [5]:
!unzip -qq /kaggle/input/dogs-vs-cats/train.zip

'unzip' is not recognized as an internal or external command,
operable program or batch file.


데이터 분할 및 디렉토리 구조 설정

In [6]:
import os, shutil, pathlib

original_dir = pathlib.Path("/kaggle/working/train")
new_base_dir = pathlib.Path("/kaggle/working/cats_vs_dogs_small")

def make_subset(subset_name, start_index, end_index):
    for category in ("cat", "dog"):
        dir = new_base_dir / subset_name / category
        print(dir)
        if not os.path.exists(dir):
            os.makedirs(dir)
        fnames = [f"{category}.{i}.jpg" for i in range(start_index, end_index)]
        for fname in fnames:
            shutil.copyfile(src=original_dir / fname,
                            dst=dir / fname)

make_subset("train", start_index=0, end_index=1000)
make_subset("validation", start_index=1000, end_index=1500)
make_subset("test", start_index=1500, end_index=2500)

\kaggle\working\cats_vs_dogs_small\train\cat
\kaggle\working\cats_vs_dogs_small\train\dog
\kaggle\working\cats_vs_dogs_small\validation\cat
\kaggle\working\cats_vs_dogs_small\validation\dog
\kaggle\working\cats_vs_dogs_small\test\cat
\kaggle\working\cats_vs_dogs_small\test\dog


Model 만들기

In [7]:
from tensorflow import keras
from tensorflow.keras import layers

# 180x180 크기의 RGB 이미지를 입력으로 받음
inputs = keras.Input(shape=(180, 180, 3))

# 입력 이미지를 정규화하여 0~1 범위의 값으로 변환
x = layers.Rescaling(1./255)(inputs)

# convolution 및 pooling layer 설정
# Conv2D Layer, MaxPooling2D Layer
x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)

# Flatten Layer
x = layers.Flatten()(x)

# 이진 분류를 위해 sigmoid activation function 사용
outputs = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inputs=inputs, outputs=outputs)

Model Compile

In [8]:
# loss function: binary_crossentropy를 사용
# optimizer: rmsprop을 사용하여 모델을 최적화
# Performance Metrics : 정확도(accuracy)

model.compile(loss="binary_crossentropy",
              optimizer="rmsprop",
              metrics=["accuracy"])

데이터셋 로드

In [9]:
# 디렉토리에서 이미지 데이터 로드
# train, val, test dataset으로 만들기
from tensorflow.keras.utils import image_dataset_from_directory

train_dataset = image_dataset_from_directory(
    new_base_dir / "train",
    image_size=(180, 180),
    batch_size=32)
validation_dataset = image_dataset_from_directory(
    new_base_dir / "validation",
    image_size=(180, 180),
    batch_size=32)
test_dataset = image_dataset_from_directory(
    new_base_dir / "test",
    image_size=(180, 180),
    batch_size=32)

Found 2000 files belonging to 2 classes.
Found 1000 files belonging to 2 classes.
Found 2000 files belonging to 2 classes.


모델 훈련

In [11]:
# callback : 가장 성능이 좋은 모델(가장 낮은 검증 손실을 기록한 모델)을 저장
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath="convnet_from_scratch.keras",
        save_best_only=True,
        monitor="val_loss")
]
history = model.fit(
    train_dataset,
    epochs=10,
    validation_data=validation_dataset,
    callbacks=callbacks)

Epoch 1/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 369ms/step - accuracy: 0.6012 - loss: 0.6728 - val_accuracy: 0.6630 - val_loss: 0.6201
Epoch 2/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 421ms/step - accuracy: 0.6333 - loss: 0.6348 - val_accuracy: 0.6660 - val_loss: 0.6108
Epoch 3/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 481ms/step - accuracy: 0.6697 - loss: 0.6107 - val_accuracy: 0.6810 - val_loss: 0.5997
Epoch 4/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 672ms/step - accuracy: 0.6984 - loss: 0.5766 - val_accuracy: 0.6910 - val_loss: 0.5585
Epoch 5/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 830ms/step - accuracy: 0.7073 - loss: 0.5672 - val_accuracy: 0.7110 - val_loss: 0.5789
Epoch 6/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 786ms/step - accuracy: 0.7141 - loss: 0.5546 - val_accuracy: 0.6820 - val_loss: 0.6308
Epoch 7/10
[1m63/63[