In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from PIL import Image
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler

In [None]:
# train data loading
# 이미지 - 배열 변환후 csv로 출력
for n in range(5):
    print(n)
    df = list()
    for i in range(n*1250, (n+1)*1250):
        # data loading & resize
        dog_img = Image.open(f'../data/dogs-vs-cats/train/dog.{i}.jpg').resize((200,200))
        cat_img = Image.open(f'../data/dogs-vs-cats/train/cat.{i}.jpg').resize((200,200))

        # 흑백변환
        dog_img = np.mean(dog_img, axis=2, keepdims=True, dtype=np.int32).astype(np.float32)
        cat_img = np.mean(cat_img, axis=2, keepdims=True, dtype=np.int32).astype(np.float32)

        df.append([1] + dog_img.ravel().tolist())
        df.append([0] + cat_img.ravel().tolist())

    df = pd.DataFrame(df)
    df.to_csv(f'../data/dogs-vs-cats/batch{n}.csv', index=False)
    del df

In [None]:
model = Sequential()

model.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu',
                 input_shape=(200,200,1)))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(filters=128, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(filters=256, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(filters=256, kernel_size=(3,3), activation='relu'))

model.add(Flatten())
model.add(Dense(units=128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(units=2, activation='softmax'))

print(model.summary())

In [None]:
# learning
model.compile(optimizer=Adam(learning_rate=1e-2), 
              loss='sparse_categorical_crossentropy',
              metrics=['sparse_categorical_accuracy'])

history_list = list()
for n in range(5):
    # data load
    df = pd.read_csv(f'../data/dogs-vs-cats/batch{n}.csv').values
    
    # x, t data split
    x_data = df[:,1:]
    t_data = df[:,0].reshape(-1,1).copy()
    del df
    
    # normalization
    scaler = MinMaxScaler()
    scaler.fit(x_data)
    x_data_norm=scaler.transform(x_data)
    del x_data
    
    history_list.append(model.fit(x_data_norm.reshape(-1, 200, 200, 1), t_data, 
                        epochs=20, verbose=1, validation_split=0.3))
    del x_data_norm
    del t_data

In [2]:
%reset
# 일부 이미지 분리(총 4000개)

import os, shutil

original_dataset_dir = '../data/dogs-vs-cats/train'

## directory 생성 ##

base_dir = '../data/cat_dog_small'
os.mkdir(base_dir)

train_dir = os.path.join(base_dir,'train')
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir,'validation')
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir,'test')
os.mkdir(test_dir)

train_cats_dir = os.path.join(train_dir,'cats')
os.mkdir(train_cats_dir)
train_dogs_dir = os.path.join(train_dir,'dogs')
os.mkdir(train_dogs_dir)

validation_cats_dir = os.path.join(validation_dir,'cats')
os.mkdir(validation_cats_dir)
validation_dogs_dir = os.path.join(validation_dir,'dogs')
os.mkdir(validation_dogs_dir)

test_cats_dir = os.path.join(test_dir,'cats')
os.mkdir(test_cats_dir)
test_dogs_dir = os.path.join(test_dir,'dogs')
os.mkdir(test_dogs_dir)

## file 복사 ##

fnames = ['cat.{}.jpg'.format(i) for i in range(1000)]
for fname in fnames:
    src = os.path.join(original_dataset_dir,fname)
    dst = os.path.join(train_cats_dir, fname)
    shutil.copyfile(src,dst)

fnames = ['cat.{}.jpg'.format(i) for i in range(1000,1500)]
for fname in fnames:
    src = os.path.join(original_dataset_dir,fname)
    dst = os.path.join(validation_cats_dir, fname)
    shutil.copyfile(src,dst)
    
fnames = ['cat.{}.jpg'.format(i) for i in range(1500,2000)]
for fname in fnames:
    src = os.path.join(original_dataset_dir,fname)
    dst = os.path.join(test_cats_dir, fname)
    shutil.copyfile(src,dst)
    

fnames = ['dog.{}.jpg'.format(i) for i in range(1000)]
for fname in fnames:
    src = os.path.join(original_dataset_dir,fname)
    dst = os.path.join(train_dogs_dir, fname)
    shutil.copyfile(src,dst)

fnames = ['dog.{}.jpg'.format(i) for i in range(1000,1500)]
for fname in fnames:
    src = os.path.join(original_dataset_dir,fname)
    dst = os.path.join(validation_dogs_dir, fname)
    shutil.copyfile(src,dst)
    
fnames = ['dog.{}.jpg'.format(i) for i in range(1500,2000)]
for fname in fnames:
    src = os.path.join(original_dataset_dir,fname)
    dst = os.path.join(test_dogs_dir, fname)
    shutil.copyfile(src,dst)

Once deleted, variables cannot be recovered. Proceed (y/[n])? y
