In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

paths = []
dataset_gubuns = []
label_gubuns = []

for dirname, _, filenames in os.walk('/kaggle/input/cat-and-dog'):
    for filename in filenames:
        if '.jpg' in filename:
            file_path = dirname+'/'+ filename
            paths.append(file_path)

            if '/training_set/' in file_path:
                dataset_gubuns.append('train')  
            elif '/test_set/' in file_path:
                dataset_gubuns.append('test')
            else: dataset_gubuns.append('N/A')

            if 'dogs' in file_path:
                label_gubuns.append('DOG')
            elif 'cats' in file_path:
                label_gubuns.append('CAT')
            else: label_gubuns.append('N/A')

In [2]:
paths[:10] , dataset_gubuns[:10], label_gubuns[:10]

(['/kaggle/input/cat-and-dog/test_set/test_set/dogs/dog.4329.jpg',
  '/kaggle/input/cat-and-dog/test_set/test_set/dogs/dog.4223.jpg',
  '/kaggle/input/cat-and-dog/test_set/test_set/dogs/dog.4253.jpg',
  '/kaggle/input/cat-and-dog/test_set/test_set/dogs/dog.4190.jpg',
  '/kaggle/input/cat-and-dog/test_set/test_set/dogs/dog.4354.jpg',
  '/kaggle/input/cat-and-dog/test_set/test_set/dogs/dog.4136.jpg',
  '/kaggle/input/cat-and-dog/test_set/test_set/dogs/dog.4226.jpg',
  '/kaggle/input/cat-and-dog/test_set/test_set/dogs/dog.4214.jpg',
  '/kaggle/input/cat-and-dog/test_set/test_set/dogs/dog.4888.jpg',
  '/kaggle/input/cat-and-dog/test_set/test_set/dogs/dog.4170.jpg'],
 ['test',
  'test',
  'test',
  'test',
  'test',
  'test',
  'test',
  'test',
  'test',
  'test'],
 ['DOG', 'DOG', 'DOG', 'DOG', 'DOG', 'DOG', 'DOG', 'DOG', 'DOG', 'DOG'])

In [3]:
pd.set_option('display.max_colwidth', 200)

data_df = pd.DataFrame({'path':paths, 'dataset':dataset_gubuns, 'label':label_gubuns})
print('data_df shape:', data_df.shape)
data_df.head(10)

data_df shape: (10028, 3)


Unnamed: 0,path,dataset,label
0,/kaggle/input/cat-and-dog/test_set/test_set/dogs/dog.4329.jpg,test,DOG
1,/kaggle/input/cat-and-dog/test_set/test_set/dogs/dog.4223.jpg,test,DOG
2,/kaggle/input/cat-and-dog/test_set/test_set/dogs/dog.4253.jpg,test,DOG
3,/kaggle/input/cat-and-dog/test_set/test_set/dogs/dog.4190.jpg,test,DOG
4,/kaggle/input/cat-and-dog/test_set/test_set/dogs/dog.4354.jpg,test,DOG
5,/kaggle/input/cat-and-dog/test_set/test_set/dogs/dog.4136.jpg,test,DOG
6,/kaggle/input/cat-and-dog/test_set/test_set/dogs/dog.4226.jpg,test,DOG
7,/kaggle/input/cat-and-dog/test_set/test_set/dogs/dog.4214.jpg,test,DOG
8,/kaggle/input/cat-and-dog/test_set/test_set/dogs/dog.4888.jpg,test,DOG
9,/kaggle/input/cat-and-dog/test_set/test_set/dogs/dog.4170.jpg,test,DOG


In [4]:
print(data_df['dataset'].value_counts())
print(data_df['label'].value_counts())

train    8005
test     2023
Name: dataset, dtype: int64
DOG    5017
CAT    5011
Name: label, dtype: int64


In [5]:
train_df = data_df[data_df['dataset']=='train']
test_df = data_df[data_df['dataset']=='test']
print('train_df shape:', train_df.shape, 'test_df shape:', test_df.shape)

train_df shape: (8005, 3) test_df shape: (2023, 3)


In [6]:
from sklearn.model_selection import train_test_split

tr_df, val_df = train_test_split(train_df, test_size=0.15, stratify=train_df['label'], random_state=2021)
print('tr_df shape:', tr_df.shape, 'val_df shape:', val_df.shape)
print('tr_df label distribution:\n', tr_df['label'].value_counts())
print('val_df label distribution:\n', val_df['label'].value_counts())

tr_df shape: (6804, 3) val_df shape: (1201, 3)
tr_df label distribution:
 DOG    3404
CAT    3400
Name: label, dtype: int64
val_df label distribution:
 DOG    601
CAT    600
Name: label, dtype: int64


In [7]:
IMAGE_SIZE = 224
BATCH_SIZE = 64

In [8]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Dense , Conv2D , Dropout , Flatten , Activation, MaxPooling2D , GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam , RMSprop 
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau , EarlyStopping , ModelCheckpoint , LearningRateScheduler
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.applications import Xception

In [12]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.xception import preprocess_input

tr_generator = ImageDataGenerator(horizontal_flip=True, preprocessing_function = preprocess_input)

tr_flow_gen = tr_generator.flow_from_dataframe(dataframe=tr_df,
                                              x_col='path',
                                              y_col='label'
                                              ,target_size=(IMAGE_SIZE, IMAGE_SIZE)
                                              ,class_mode = 'binary'
                                              ,batch_size=BATCH_SIZE
                                              ,shuffle=True)

Found 6804 validated image filenames belonging to 2 classes.


In [13]:
images_array = next(tr_flow_gen)[0]
labels_array = next(tr_flow_gen)[1]
print(images_array.shape)
print(labels_array.shape)

(64, 224, 224, 3)
(64,)


In [14]:
val_generator = ImageDataGenerator(preprocessing_function = preprocess_input)

val_flow_gen = val_generator.flow_from_dataframe(dataframe=val_df
                                                ,x_col='path'
                                                ,y_col='label'
                                                ,target_size=(IMAGE_SIZE, IMAGE_SIZE)
                                                ,class_mode='binary'
                                                ,batch_size=BATCH_SIZE
                                                ,shuffle=False)

Found 1201 validated image filenames belonging to 2 classes.


In [15]:
input_tensor = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
base_model = Xception(input_tensor=input_tensor, include_top=False, weights='imagenet')

bm_output = base_model.output

x = GlobalAveragePooling2D()(bm_output)
x = Dropout(rate=0.5)(x)
x = Dense(50, activation='relu', name='fc1')(x)
output = Dense(1, activation='sigmoid', name='output')(x)

model = Model(inputs=input_tensor, outputs=output)

model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 111, 111, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 111, 111, 32) 128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 111, 111,

In [16]:
model.compile(optimizer=Adam(0.001), loss='binary_crossentropy', metrics=['accuracy'])

rlr_cb = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, mode='min', verbose=1)
ely_cb = EarlyStopping(monitor='val_loss', patience=5, mode='min', verbose=1)

In [17]:
model.fit(tr_flow_gen, epochs=20, validation_data=val_flow_gen,callbacks=[rlr_cb, ely_cb])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20

Epoch 00015: ReduceLROnPlateau reducing learning rate to 4.0000001899898055e-05.
Epoch 16/20
Epoch 17/20
Epoch 00017: early stopping


<tensorflow.python.keras.callbacks.History at 0x7f1ba750f050>

In [18]:
test_generator = ImageDataGenerator(preprocessing_function = preprocess_input)
test_flow_gen = test_generator.flow_from_dataframe(dataframe=test_df # image file이 있는 디렉토리
                                      ,x_col='path'
                                      ,y_col='label'
                                      ,target_size=(IMAGE_SIZE, IMAGE_SIZE) # 원본 이미지를 최종 resize할 image size
                                      ,class_mode='binary' # 문자열 label을 자동으로 encoding. 
                                      ,batch_size=BATCH_SIZE
                                      ,shuffle=False
                                      )
# evaluation으로 성능 검증
model.evaluate(test_flow_gen)

Found 2023 validated image filenames belonging to 2 classes.


[0.0815253034234047, 0.9797330498695374]