## Import Library

In [1]:
import pandas as pd
import numpy as np
import os
from keras.models import Sequential
from keras import layers
from keras.layers import Conv2D  
from keras.layers import MaxPooling2D
from keras.layers import Dropout
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Activation
from keras.layers import GlobalMaxPooling2D
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.applications import VGG16
from keras.models import Model
import matplotlib.pyplot as plt

### Download the dataset from kaggle

In [2]:
{"username":"nesreenahmed","key":"c53f3958a0f70dff37202dc3f20b134a"}

{'username': 'nesreenahmed', 'key': 'c53f3958a0f70dff37202dc3f20b134a'}

In [3]:
# to download the dataset from kaggle
!pip install -U -q kaggle
!mkdir -p ~/.kaggle
!echo '{"username":"nesreenahmed","key":"c53f3958a0f70dff37202dc3f20b134a"}' > ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json
!kaggle competitions download -c dogs-vs-cats

dogs-vs-cats.zip: Skipping, found more recently modified local copy (use --force to force download)


### Extract the data from zipped files

In [4]:
import zipfile

In [5]:
zip_file=['test1','train']
for zip_file in zip_file:
  with zipfile.ZipFile('/content/dogs-vs-cats.zip'.format(zip_file),'r') as z: # from the variable called zipfile used the method called ZipFile and perform read operation
    z.extractall('.') # extract all the files seperated by the "."
    print('{} unzipped'.format(zip_file))

test1 unzipped
train unzipped


In [6]:
with zipfile.ZipFile('/content/train.zip','r') as z:
  z.extractall('.') 
  print('{} unzipped'.format(zip_file))

train unzipped


In [7]:
with zipfile.ZipFile('/content/test1.zip') as z:
  z.extractall('.')
  print('{} unzipped'.format(zip_file))

train unzipped


### Prepare Traning Data

In [8]:
img_file='/content/train'
file_names=os.listdir(img_file)
categories =[]
path=[]
for file_name in file_names:
  category=file_name.split('.')[0]
  full_path=os.path.join(img_file,file_name)
  path.append(full_path)
  categories .append(category)

df=pd.DataFrame()
df['img_path']=path
df['category']=categories 
df.head(10)

Unnamed: 0,img_path,category
0,/content/train/cat.4399.jpg,cat
1,/content/train/cat.9150.jpg,cat
2,/content/train/dog.8418.jpg,dog
3,/content/train/dog.5294.jpg,dog
4,/content/train/dog.8787.jpg,dog
5,/content/train/cat.8944.jpg,cat
6,/content/train/cat.11812.jpg,cat
7,/content/train/dog.3093.jpg,dog
8,/content/train/dog.9486.jpg,dog
9,/content/train/dog.3853.jpg,dog


## Build Model

In [9]:
input_shape = (250, 250, 3)
batch_size = 32
pre_trained_model = VGG16(input_shape=input_shape, include_top=False, weights="imagenet")   
for layer in pre_trained_model.layers[:15]:
    layer.trainable = False
for layer in pre_trained_model.layers[15:]:
    layer.trainable = True
    
last_layer = pre_trained_model.get_layer('block5_pool')
last_output = last_layer.output
    
# Flatten the output layer to 1 dimension
x = GlobalMaxPooling2D()(last_output)
# Add a fully connected layer with 512 hidden units and ReLU activation
x = Dense(512, activation='relu')(x)
# Add a dropout rate of 0.5
x = Dropout(0.5)(x)
# Add a final sigmoid layer for classification
x = layers.Dense(1, activation='sigmoid')(x)
model = Model(pre_trained_model.input, x)
model.compile(loss='binary_crossentropy',optimizer=optimizers.SGD(lr=1e-4, momentum=0.9), metrics=['accuracy'])

model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 250, 250, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 250, 250, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 250, 250, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 125, 125, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 125, 125, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 125, 125, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 62, 62, 128)       0     

  super(SGD, self).__init__(name, **kwargs)


### Split train and test data

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
train_df, validate_df = train_test_split(df, test_size=0.1)
train_df = train_df.reset_index()
validate_df = validate_df.reset_index()

# validate_df = validate_df.sample(n=100).reset_index() # use for fast testing code purpose
# train_df = train_df.sample(n=1800).reset_index() # use for fast testing code purpose

total_train = train_df.shape[0]
total_validate = validate_df.shape[0]

### Traning Generator

In [12]:
train_datagen = ImageDataGenerator(rotation_range=15,rescale=1./255, horizontal_flip=True,fill_mode='nearest',width_shift_range=0.1,height_shift_range=0.1)
train_generator = train_datagen.flow_from_dataframe(train_df,"/content/train", x_col='img_path',y_col='category',class_mode='binary',target_size=(250, 250),batch_size=batch_size)

Found 22500 validated image filenames belonging to 2 classes.


### Validation Generator

In [13]:
validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(validate_df, "/content/train", x_col='img_path',y_col='category',class_mode='binary',target_size=(250, 250),batch_size=batch_size)

Found 2500 validated image filenames belonging to 2 classes.


### Training the model

In [14]:
history = model.fit_generator(train_generator,epochs=5,validation_data=validation_generator,validation_steps=total_validate//batch_size,steps_per_epoch=total_train//batch_size)

  """Entry point for launching an IPython kernel.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [15]:
loss, accuracy = model.evaluate_generator(validation_generator, total_validate//batch_size, workers=12)
print("Test: accuracy = %f  ;  loss = %f " % (accuracy, loss))

  """Entry point for launching an IPython kernel.


Test: accuracy = 0.961939  ;  loss = 0.092905 
