* Source : https://taeguu.tistory.com/27

In [None]:
import gc
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

# 교차검증 lib
from sklearn.model_selection import StratifiedKFold,train_test_split
from tqdm import tqdm_notebook
from sklearn.metrics import accuracy_score, roc_auc_score
#모델 lib
from keras.datasets import mnist
from keras.utils.np_utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator, load_img
from keras.models import Sequential, Model
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.layers import Dense, Dropout, Flatten, Activation, Conv2D, AveragePooling2D,BatchNormalization, MaxPooling2D
from keras import layers
from keras.optimizers import Adam,RMSprop, SGD

#모델
from keras.applications import VGG16, VGG19, resnet50

#경고메세지 무시
import warnings
warnings.filterwarnings(action='ignore')

import os
import gc

# Data

In [None]:
# https://www.kaggle.com/bulentsiyah/dogs-vs-cats-classification-vgg16-fine-tuning
filenames = os.listdir("/content/dogs-vs-cats/train/train")
! /content/
categories = []
for filename in filenames:
    category = filename.split('.')[0]
    if category == 'dog':
        categories.append(1)
    else:
        categories.append(0)

train = pd.DataFrame({
    'filename': filenames,
    'category': categories
})
train.head()


In [None]:
!pwd


#Visualizing the data


In [None]:
sample = filenames[2]
image = load_img("../input/dogs-vs-cats/train/train/"+sample)
plt.imshow(image)
plt.show()
train["category"] = train["category"].astype('str')


its = np.arange(train.shape[0])
train_idx, test_idx = train_test_split(its, train_size = 0.8, random_state=42)

df_train = train.iloc[train_idx, :]
X_test = train.iloc[test_idx, :]

its = np.arange(df_train.shape[0])
train_idx, val_idx = train_test_split(its, train_size = 0.8, random_state=42)
X_train = df_train.iloc[train_idx, :]
X_val = df_train.iloc[val_idx, :]

print(X_train.shape)
print(X_val.shape)
print(X_test.shape)
X_train['category'].value_counts()


In [None]:
# Parameter


In [None]:
image_size = 227
img_size = (image_size, image_size)
nb_train_samples = len(X_train)
nb_validation_samples = len(X_val)
nb_test_samples = len(X_test)
epochs = 20
#batch size 128
batch_size =128


In [None]:

# Define Generator config


In [None]:
train_datagen =ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
    )

val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)


In [None]:
#generator


In [None]:
train_generator = train_datagen.flow_from_dataframe(
    dataframe=X_train, 
    directory="../input/dogs-vs-cats/train/train",
    x_col = 'filename',
    y_col = 'category',
    target_size = img_size,
    color_mode='rgb',
    class_mode='binary',
    batch_size=batch_size,
    seed=42
)

validation_generator = val_datagen.flow_from_dataframe(
    dataframe=X_val, 
    directory="../input/dogs-vs-cats/train/train",
    x_col = 'filename',
    y_col = 'category',
    target_size = img_size,
    color_mode='rgb',
    class_mode='binary',
    batch_size=batch_size,
)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=X_test,
    directory="../input/dogs-vs-cats/train/train",
    x_col = 'filename',
    y_col=None,
    target_size= img_size,
    color_mode='rgb',
    class_mode=None,
    batch_size=batch_size,
    shuffle=False
)


In [None]:
# Model - AlexNet


In [None]:
#INPUT
input_shape = (227, 227, 3)
model = Sequential()
#CONV1
model.add(Conv2D(96, (11, 11), strides=4,padding='valid', input_shape=input_shape))
#MAX POOL1
model.add(MaxPooling2D(pool_size=(3, 3), strides=2))
#NORM1  Local response normalization 사용하였는데 현재는 사용하지 않습니다. 현재는 Batch Normalizetion을 사용합니다.
model.add(BatchNormalization())
#CONV2
model.add(Conv2D(256, (3, 3), activation='relu', padding='same'))
#MAX POOL1
model.add(MaxPooling2D(pool_size=(3, 3), strides=2))
#NORM2
model.add(BatchNormalization())
#CONV3
model.add(Conv2D(384, (3, 3),strides=1, activation='relu', padding='same'))
#CONV4
model.add(Conv2D(384, (3, 3),strides=1, activation='relu', padding='same'))
#CONV5
model.add(Conv2D(256, (3, 3),strides=1, activation='relu', padding='same'))
#MAX POOL3
model.add(MaxPooling2D(pool_size=(3, 3), strides=2))
model.add(Flatten())
#FC6 예측 class가 적어 FC layer을 조정했습니다.
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))
#FC7
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
#FC8 이진 분류이기 때문에 sigmoid
model.add(Dense(1, activation='sigmoid'))
# SGD Momentum 0.9, L2 weight decay 5e-4
optimizer =  SGD(lr=0.01, decay=5e-4, momentum=0.9)
model.compile(loss='binary_crossentropy',
              optimizer=optimizer, metrics=['accuracy'])
model.summary()



# Train

In [None]:

def get_steps(num_samples, batch_size):
    if (num_samples % batch_size) > 0 :
        return (num_samples // batch_size) + 1
    else :
        return num_samples // batch_size
        %%time
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau


In [None]:

#model path
MODEL_SAVE_FOLDER_PATH = './model/'
if not os.path.exists(MODEL_SAVE_FOLDER_PATH):
    os.mkdir(MODEL_SAVE_FOLDER_PATH)

model_path = MODEL_SAVE_FOLDER_PATH + 'AlexNet.hdf5'

patient = 5
callbacks_list = [
    # Learning rate 1e-2, reduced by 10  manually when val accuracy plateaus
    ReduceLROnPlateau(
        monitor = 'val_accuracy', 
        #콜백 호출시 학습률(lr)을 10으로 나누어줌
        factor = 0.1, 
        #5epoch 동안 val_accuracy가 상승하지 않으면 lr 조정
        patience = patient, 
        #최소학습률
        min_lr=0.00001,
        verbose=1,
        mode='min'
    ),
    ModelCheckpoint(
        filepath=model_path,
        monitor ='val_accuracy',
        # val_loss가 좋지 않으면 모델파일을 덮어쓰지 않는다
        save_best_only = True,
        verbose=1,
        mode='min') ]
        history = model.fit_generator(
    train_generator,
    steps_per_epoch = get_steps(nb_train_samples, batch_size),
    epochs=epochs,
    validation_data = validation_generator,
    validation_steps = get_steps(nb_validation_samples, batch_size),
    callbacks = callbacks_list
)
gc.collect()


# Predict

In [None]:
%%time
test_generator.reset()
prediction = model.predict_generator(
    generator = test_generator,
    steps = get_steps(nb_test_samples, batch_size),
    verbose=1
)
print('Test accuracy : ', roc_auc_score(X_test['category'].astype('int'), prediction, average='macro'))

# acc / loss plot

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
epochs = range(len(acc))

plt.plot(epochs, acc, label='Training acc')
plt.plot(epochs, val_acc, label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.ylim(0.5,1)
plt.show()

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']

plt.plot(epochs, loss, label='Training loss')
plt.plot(epochs, val_loss, label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.ylim(0,0.5)
plt.show()


# Result

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container {width:90% !important;}</style>"))
