In [12]:
import numpy as np
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator,load_img
from sklearn.model_selection import train_test_split
import os



In [2]:
# Reading the images and creating it to data frame

images=os.listdir("C:/Users/USER/Downloads/cat vs dog/cat vs dog")
categories=[]
for i in images:
    category=i.split('.')[0]
    if category=='cat':
        categories.append(0)
    else:
        categories.append(1)
        
df=pd.DataFrame({'File':images, 'category':categories})

In [3]:
df.head()

Unnamed: 0,File,category
0,cat.1.jpg,0
1,cat.10.jpg,0
2,cat.100.jpg,0
3,cat.1000.jpg,0
4,cat.1001.jpg,0


In [4]:
# Checking non-image file

a=[i for i in df['File'] if not i.endswith('.jpg')]
a

['_DS_Store']

In [5]:
# Finding the index of non-image file 

np.where(df['File']=='_DS_Store')

(array([8005], dtype=int64),)

In [6]:
# Droping the non-image file

df.drop(df.index[8005],inplace=True)

In [7]:
df.shape

(8005, 2)

In [8]:
# Data frames are suffled by sample function(frac=1--->100% suffle)

df=df.sample(frac=1,random_state=42)
df.head()

Unnamed: 0,File,category
7587,dog.625.jpg,1
6296,dog.3063.jpg,1
7050,dog.3742.jpg,1
2107,cat.2896.jpg,0
5577,dog.2417.jpg,1


In [67]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D,MaxPooling2D,Dropout,Activation,BatchNormalization,Flatten,Dense

In [68]:
model=Sequential()
model.add(Conv2D(30,(3,3),activation='relu',input_shape=(120,120,3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))
model.add(Conv2D(75,(3,3),activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))
model.add(Conv2D(120,(3,3),activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(220,activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(2,activation='softmax'))
model.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics=['accuracy'])

In [69]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 118, 118, 30)      840       
_________________________________________________________________
batch_normalization (BatchNo (None, 118, 118, 30)      120       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 59, 59, 30)        0         
_________________________________________________________________
dropout (Dropout)            (None, 59, 59, 30)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 57, 57, 75)        20325     
_________________________________________________________________
batch_normalization_1 (Batch (None, 57, 57, 75)        300       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 28, 28, 75)        0

In [70]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [96]:
early_stopping=EarlyStopping(monitor='val_accuracy',
                            patience=10)
reduce_lr=ReduceLROnPlateau(monitor='val_accuracy',
                           patience=5,
                           factor=0.5,
                           verbose=1,
                           min_lr=0.00001)
callback=[reduce_lr,early_stopping]

In [97]:
df.head()

Unnamed: 0,File,category
7587,dog.625.jpg,Dog
6296,dog.3063.jpg,Dog
7050,dog.3742.jpg,Dog
2107,cat.2896.jpg,Cat
5577,dog.2417.jpg,Dog


In [98]:
# output should be in categories. If int o/p is passed to generator error occurs on class_mode.

df['category']=df['category'].replace({0:'Cat',1:'Dog'})

In [99]:
# data frame is seperated to train and validation

train_df,validation_df=train_test_split(df,test_size=0.2,random_state=42)

In [100]:
train_df.shape, validation_df.shape

((6404, 2), (1601, 2))

In [101]:
# Due to shuffle index won't be in order, so reseting it

train_df=train_df.reset_index(drop=True)
train_df.head()

Unnamed: 0,File,category
0,cat.596.jpg,Cat
1,cat.3011.jpg,Cat
2,cat.2206.jpg,Cat
3,dog.635.jpg,Dog
4,cat.2202.jpg,Cat


In [102]:
validation_df=validation_df.reset_index(drop=True)
validation_df.head()

Unnamed: 0,File,category
0,dog.73.jpg,Dog
1,dog.3279.jpg,Dog
2,dog.242.jpg,Dog
3,cat.1051.jpg,Cat
4,dog.289.jpg,Dog


In [103]:
# Creating batch size, image size

batch_size=15
image_size=(120,120)
# Creating image data generator

train_gen=ImageDataGenerator(rotation_range=15,
                             width_shift_range=0.2,
                             height_shift_range=0.2,
                             horizontal_flip=True,
                             rescale=1./255)
                             

train_generator=train_gen.flow_from_dataframe(train_df,
                                             directory='C:/Users/USER/Downloads/cat vs dog/cat vs dog',
                                             x_col='File',
                                             y_col='category',
                                             class_mode='categorical',
                                             target_size=image_size,
                                             batch_size=batch_size)



Found 6404 validated image filenames belonging to 2 classes.


In [104]:
validation_gen=ImageDataGenerator(rescale=1./255)

validation_generator=validation_gen.flow_from_dataframe(validation_df,
                                             directory='C:/Users/USER/Downloads/cat vs dog/cat vs dog',
                                             x_col='File',
                                             y_col='category',
                                             class_mode='categorical',
                                             target_size=image_size,           
                                             batch_size=batch_size)


Found 1601 validated image filenames belonging to 2 classes.


In [105]:
history = model.fit_generator(train_generator,
                             steps_per_epoch=6404//15,
                             epochs=50,
                             verbose=1,
                             callbacks=callback,
                             validation_data=validation_generator,
                             validation_steps=1601//15)



Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50

Epoch 00019: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50

Epoch 00034: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50

Epoch 00042: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [106]:
model.save('catvsdog_50epoch.h5')

In [2]:
from keras.models import load_model

In [3]:
model=load_model('catvsdog_50epoch.h5')


In [4]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 118, 118, 30)      840       
_________________________________________________________________
batch_normalization (BatchNo (None, 118, 118, 30)      120       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 59, 59, 30)        0         
_________________________________________________________________
dropout (Dropout)            (None, 59, 59, 30)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 57, 57, 75)        20325     
_________________________________________________________________
batch_normalization_1 (Batch (None, 57, 57, 75)        300       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 28, 28, 75)        0

In [86]:
im=os.listdir("C:/Users/USER/Pictures/cat vs dog test")
im

['cat_1.jpg', 'cat_2.jpg', 'cat_3.jpg', 'dog_1.jpg', 'dog_2.jpg', 'dog_3.jpg']

In [87]:
image_size=(120,120)

In [88]:
test_df=pd.DataFrame({'file':im})
test_df

Unnamed: 0,file
0,cat_1.jpg
1,cat_2.jpg
2,cat_3.jpg
3,dog_1.jpg
4,dog_2.jpg
5,dog_3.jpg


In [89]:
test_gen=ImageDataGenerator(rescale=1./255)

test_generator=test_gen.flow_from_dataframe(test_df,
                                             directory="C:/Users/USER/Pictures/cat vs dog test",
                                             x_col='file',
                                             y_col=None,
                                             class_mode=None,
                                             target_size=image_size)

Found 6 validated image filenames.


In [90]:
predict=model.predict_classes(test_generator)



In [91]:
test_df['predict']=predict

In [92]:
test_df['predict']=test_df['predict'].replace({0:'cat',1:'dog'})
test_df

Unnamed: 0,file,predict
0,cat_1.jpg,cat
1,cat_2.jpg,cat
2,cat_3.jpg,dog
3,dog_1.jpg,dog
4,dog_2.jpg,cat
5,dog_3.jpg,dog
