In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/dogs-vs-cats/test1.zip
/kaggle/input/dogs-vs-cats/train.zip
/kaggle/input/dogs-vs-cats/sampleSubmission.csv


In [2]:
%%capture cell_output

# unzipping train.zip 
!unzip "../input/dogs-vs-cats/train.zip"

#rename train folder
import os

src_train = os.path.join(os.getcwd(), 'src_train')

os.rename(os.path.join(os.getcwd(), 'train'), src_train)

In [3]:
%%capture cell_output

# unzipping test1.zip 
!unzip "../input/dogs-vs-cats/test1.zip"

test_dir = os.path.join(os.getcwd(), 'test1')

In [4]:
train_dir = os.path.join(os.getcwd(), "train")
if not os.path.isdir(train_dir):
    os.mkdir(train_dir)

train_cats = os.path.join(train_dir, "cats")
if not os.path.isdir(train_cats):
    os.mkdir(train_cats)

train_dogs = os.path.join(train_dir, "dogs")
if not os.path.isdir(train_dogs):
    os.mkdir(train_dogs)

#validation folders   
validation_dir = os.path.join(os.getcwd(), "validation")
if not os.path.isdir(validation_dir):
    os.mkdir(validation_dir)

    
val_cats = os.path.join(validation_dir, "cats")
if not os.path.isdir(val_cats):
    os.mkdir(val_cats)

val_dogs = os.path.join(validation_dir, "dogs")
if not os.path.isdir(val_dogs):
    os.mkdir(val_dogs)

#hold_out folder
hold_out = os.path.join(os.getcwd(), "hold_out")
if not os.path.isdir(hold_out):
    os.mkdir(hold_out)

In [5]:
import re
import random
src_trn_files = os.listdir(src_train)

#list of file names with cat images from train dir
cat_files = [src_trn_files[i] for i, x in enumerate(src_trn_files) if re.match(r'^cat', x)]

#list of file names with dog images from train dir
dog_files = [src_trn_files[i] for i, x in enumerate(src_trn_files) if re.match(r'^dog', x)]

#random sample 2100 cats image file names
cat_files = random.sample(cat_files, 2100)

#random sample 2100 dogs image file names
dog_files = random.sample(dog_files, 2100)

import shutil

#copy cats images to train_cats folder
for fname in cat_files[:1500]:
    src = os.path.join(src_train, fname)
    dst = os.path.join(train_cats, fname)
    shutil.copyfile(src, dst)
    
#copy cats images to val_cats folder
for fname in cat_files[1500:2000]:
    src = os.path.join(src_train, fname)
    dst = os.path.join(val_cats, fname)
    shutil.copyfile(src, dst)

#copy dog images to train_dogs folder
for fname in dog_files[:1500]:
    src = os.path.join(src_train, fname)
    dst = os.path.join(train_dogs, fname)
    shutil.copyfile(src, dst)

#copy dogs images to val_dogs folder
for fname in dog_files[1500:2000]:
    src = os.path.join(src_train, fname)
    dst = os.path.join(val_dogs, fname)
    shutil.copyfile(src, dst)
    
#copy dogs & cats images to hold_out folder
for fname in cat_files[2000:] + dog_files[2000:] :
    src = os.path.join(src_train, fname)
    dst = os.path.join(hold_out, fname)
    shutil.copyfile(src, dst)

In [6]:
import numpy as np
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=40,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   fill_mode='nearest')


val_datagen = ImageDataGenerator(rescale=1./255)

In [7]:
train_generator = train_datagen.flow_from_directory(train_dir,
                                                    target_size=(150,150),
                                                    batch_size=20,
                                                    class_mode='binary')

validation_generator = val_datagen.flow_from_directory(validation_dir,
                                                        target_size=(150,150),
                                                        batch_size=20,
                                                        class_mode='binary')

Found 3000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.


In [8]:
from keras.layers import Input, Lambda, Dense, Flatten
from keras.models import Model
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
import numpy as np
from glob import glob
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [9]:
# loading the pretrained weights and defining the image size
IMAGE_SIZE = [150, 150]
vgg = VGG16(input_shape=IMAGE_SIZE + [3], weights='imagenet', include_top=False)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [10]:
for layer in vgg.layers:
  layer.trainable = False

In [11]:
#preparing the final layer for output
x = Flatten()(vgg.output)
prediction = Dense(1, activation='sigmoid')(x)
model = Model(inputs=vgg.input, outputs=prediction)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 150, 150, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 150, 150, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 150, 150, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 75, 75, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 75, 75, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 75, 75, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 37, 37, 128)       0     

In [12]:
from keras import optimizers


#sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9)
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
from datetime import datetime
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.callbacks import ReduceLROnPlateau

#lr_scheduler = LearningRateScheduler(lr_schedule)

#lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
#                               cooldown=0,
#                               patience=5,
#                               min_lr=0.5e-6)

#num_epochs = 1000
#num_batch_size = 32

checkpoint = ModelCheckpoint(filepath='mymodel_adam.h5', 
                               verbose=1, save_best_only=True)

callbacks = [checkpoint]

start = datetime.now()

model.fit_generator(
  train_generator,
  validation_data=validation_generator,
  epochs=15,
  steps_per_epoch=50,
  validation_steps=32,
    callbacks=callbacks ,verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)




Epoch 1/15

Epoch 00001: val_loss improved from inf to 0.40494, saving model to mymodel_adam.h5
Epoch 2/15

Epoch 00002: val_loss improved from 0.40494 to 0.38217, saving model to mymodel_adam.h5
Epoch 3/15

Epoch 00003: val_loss improved from 0.38217 to 0.33127, saving model to mymodel_adam.h5
Epoch 4/15

Epoch 00004: val_loss improved from 0.33127 to 0.30733, saving model to mymodel_adam.h5
Epoch 5/15

Epoch 00005: val_loss improved from 0.30733 to 0.28451, saving model to mymodel_adam.h5
Epoch 6/15

Epoch 00006: val_loss did not improve from 0.28451
Epoch 7/15

Epoch 00007: val_loss improved from 0.28451 to 0.28162, saving model to mymodel_adam.h5
Epoch 8/15

In [None]:
test_data = os.listdir(test_dir)
test_df = pd.DataFrame({
    "filename" : test_data
})

In [None]:
test_datagen = ImageDataGenerator(rescale = 1./255)
test_generator = test_datagen.flow_from_dataframe(
    test_df,
    test_dir,
    target_size = (150, 150),
    x_col = "filename",
    y_col = None,
    batch_size = 10,
    class_mode = None,
    shuffle = True,
    color_mode="rgb",
)

In [None]:
predicted = model.predict(test_generator)
test_df['category'] = np.argmax(predicted, axis = 1)

In [None]:
test_df['id'] = test_df['filename'].str.split('.').str[0]
test_df['label'] = test_df['category']
test_df.drop(['filename', 'category'], axis = 1, inplace = True)
test_df.to_csv('submission.csv', index = False)