In [None]:
import os 
import shutil
from os.path import join as path_join
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib
import datetime
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense, Reshape
from keras import backend as K
from keras.optimizers import RMSprop
import seaborn as sns
matplotlib.use('Agg')


import matplotlib.pyplot as plt
%matplotlib inline

# Model


In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Activation, Dropout, Flatten, Dense, Reshape
from keras import backend as K

def naive_model():
    model = Sequential()
    model.add(Conv2D(32, (3,3), input_shape=input_shape))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    return model

def kaggle_model():

    model = Sequential()

    model.add(Conv2D(32, (3,3), input_shape=input_shape, activation='relu'))
    model.add(Conv2D(32, (3,3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (3,3), activation='relu'))
    model.add(Conv2D(64, (3,3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(128, (3,3), activation='relu'))
    model.add(Conv2D(128, (3,3), activation='relu'))

    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(256, (3,3), activation='relu'))
    model.add(Conv2D(256, (3,3), activation='relu'))

    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    model.compile(loss=objective, optimizer=optimizer, metrics=['accuracy'])
    return model

def VGG_19(weights_path=None):
    model = Sequential()
    model.add(ZeroPadding2D((1,1),input_shape=input_shape))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Conv2D(256, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Conv2D(256, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Conv2D(256, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Conv2D(256, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(ZeroPadding2D((1,1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='softmax'))
    model.add(Activation('sigmoid'))

    model.compile(loss=objective, optimizer=optimizer, metrics=['accuracy'])
    return model

# Parameters

In [None]:
img_width, img_height = 150, 150
if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)
    
    
# Train Setting
epochs = 50
batch_size = 16
train_size = 1000
nb_train_samples = train_size // 4 * 3
nb_validation_samples = train_size // 4

# Model Setting
suf = str(datetime.date.today())
model_func = kaggle_model
objective = 'binary_crossentropy'
optimizer = RMSprop(lr=1e-4)

In [None]:
import datetime
def save_model(model):
    model.save_weights('./models/t{}-s{}-e{}-b{}-{}image.h5'.format(suf, train_size, epochs, batch_size, model_func.__name__))

## Load Data

In [None]:
# cat 0 dog 1
def get_train_data(base_path='./data/image/'):
    data = {'filename': [], 'label': []}
    files = os.listdir(base_path)
    for f in files:
        label = f.split('.')[0]
        data['filename'].append(f)
        data['label'].append(label)
    return pd.DataFrame(data)

In [None]:
data = get_train_data()

In [None]:
data, target = data['filename'], data['label']

In [None]:
sns.countplot(target)
sns.plt.title("dog and cat")

In [None]:
data, target = data.iloc[:train_size], target.iloc[:train_size]

In [None]:
train_x, test_x , train_y, test_y = train_test_split(data, target, random_state=2333)

In [None]:
base_path='./data/'
image_paht = path_join(base_path, 'image')
train_path = path_join(base_path, 'train')
valid_path = path_join(base_path, 'valid')
train_dog_path = path_join(train_path, 'dogs')
train_cat_path = path_join(train_path, 'cats')
test_dog_path = path_join(valid_path, 'dogs')
test_cat_path = path_join(valid_path, 'cats')

In [None]:
def split_train_test_to_folder(train_x, train_y, test_x, test_y):
    for path in [train_dog_path, train_cat_path, test_dog_path, test_cat_path]:
        if not os.path.exists(path):
            os.makedirs(path)
    for filename, label in zip(train_x, train_y):
        src = path_join(image_paht, filename)
        if label == 'dog':
            dst = path_join(train_dog_path, filename)
        else :
            dst = path_join(train_cat_path, filename)
        shutil.copy(src, dst)
    for filename, label in zip(test_x, test_y):
        src = path_join(image_paht, filename)
        if label == 'dog':
            dst = path_join(test_dog_path, filename)
        else :
            dst = path_join(test_cat_path, filename)
        shutil.copy(src, dst)

In [None]:
split_train_test_to_folder(train_x, train_y, test_x, test_y)

### Keras image Generator

In [None]:
from keras.preprocessing.image import ImageDataGenerator

In [None]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
train_data_generator = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
)

In [None]:
import random

In [None]:
cat = load_img('data/train/cats/{}'.format(random.choice(os.listdir('data/train/cats/'))))
print(cat.size)
print("Cat Sample")
plt.imshow(cat)

In [None]:
dog = load_img('data/train/dogs/{}'.format(random.choice(os.listdir('data/train/dogs/'))))
print(dog.size)
print("Dog Sample")
plt.imshow(dog)

In [None]:
x = img_to_array(cat)
print(x.shape)
plt.imshow(x)

In [None]:
x = x.reshape((1,) + x.shape)
print(x.shape)

In [None]:
i = 0
for batch in train_data_generator.flow(x, batch_size=1):
    fig = plt.figure()
    plt.imshow(batch[0])
    i+=1
    if i > 5:break

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense, Reshape
from keras import backend as K

In [None]:
model = model_func()
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [None]:
test_datagen = ImageDataGenerator(rescale=1. / 255)

In [None]:
train_generator = train_data_generator.flow_from_directory(
    train_path,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

In [None]:
validation_generator = test_datagen.flow_from_directory(
    valid_path,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

In [None]:
from keras.callbacks import Callback

In [None]:
class LossHistory(Callback):
    def on_train_begin(self, logs={}):
        self.losses = []
        self.val_losses = []
        
    def on_epoch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))

In [None]:
history = LossHistory()
model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size, callbacks=[history])

In [None]:
loss = history.losses
val_loss = history.val_losses

plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('VGG-16 Loss Trend')
plt.plot(loss, 'blue', label='Training Loss')
plt.plot(val_loss, 'green', label='Validation Loss')
plt.xticks(range(0,epochs)[0::2])
plt.legend()
plt.show()

In [None]:
save_model(model)

## Predict

In [None]:
test_path = './data/test/'
test_data_gen = ImageDataGenerator(rescale=1. / 255)
test_generator = test_data_gen.flow_from_directory(
    test_path,
    target_size=(img_width, img_height),
    batch_size=batch_size)

In [None]:
tests = os.listdir('./data/test/')

In [None]:
image = load_img(path_join(test_path, tests[0]))

In [None]:
image = image.resize((150,150))

In [None]:
image = img_to_array(image)
image = image.reshape((1,) + image.shape)
for x in test_data_gen.flow(image):
    plt.imshow(x[0])
    break

In [None]:
tests = sorted(tests, key=lambda x: int(x.split('.')[0]))

In [None]:
res = {}

In [None]:
for fn in tests:
    image = load_img(path_join(test_path, fn))
    image = image.resize((150,150))
    image = img_to_array(image)
    image = image.reshape((1,) + image.shape)
    for x in test_data_gen.flow(image):
        res[fn] = model.predict(x)
        break
#     image = load_img(path_join('./data/test', fn))
#     image = image.resize((150,150))
#     image = img_to_array(image)
#     image = image.reshape((1,) + image.shape)
#     res[fn] = model.predict(image)

In [None]:
res

In [None]:
for key in res.keys():
    if 'jpg' not in key:
        del res[key]

In [None]:
for key in res:
    pro = res[key][0][0]
    res[key] = pro

In [None]:
res = {key.split('.')[0]:value for key, value in res.items()}

In [None]:
for fn in [random.choice(res.keys()) for i in range(10)]:
    print(res[fn])
    image = load_img(path_join('./data/test/{}.jpg'.format(fn)))
    plt.imshow(image)
    plt.show()

In [None]:
df = {'id':[], 'label':[]}
for key in res:
    df['id'].append(key)
    df['label'].append(round(res[key],2))

In [None]:
df = pd.DataFrame(df)

In [None]:
df.to_csv('./output/{}-{}-{}-{}-{}output.csv'.format(suf, train_size, epochs, batch_size, model_func.__name__),index=False)