In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

In [None]:
!kaggle datasets download -d salader/dogs-vs-cats

dogs-vs-cats.zip: Skipping, found more recently modified local copy (use --force to force download)


In [None]:
import zipfile
zip_ref = zipfile.ZipFile('/content/dogs-vs-cats.zip','r')
zip_ref.extractall('/content')
zip_ref.close()

In [None]:
import tensorflow as tf
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense,Conv2D, MaxPooling2D,Flatten

In [None]:
import os
# what we can do is feed all the images to the model and for that we can write a custom code by importing the os module
# we will pick up all the files and their labels and then feed it to our cnn model
# the issue here is that since the data is very large, more than 1GB, there might be less space available in the RAM
# Hence processing (provided by keras) the whole model all at once might be an issue
# Hence we use generators that didvide our data into batches and in RAM we load these batches one by one

In [None]:
# generators
train_ds = keras.utils.image_dataset_from_directory(
    directory = '/content/train' ,
    labels = 'inferred' ,
    label_mode = 'int' , #cats = 0 and dogs = 1
    batch_size = 32 ,
    image_size = (256,256)
)

Found 20000 files belonging to 2 classes.


In [None]:
validation_ds = keras.utils.image_dataset_from_directory(
    directory = '/content/test' ,
    labels = 'inferred' ,
    label_mode = 'int' , #cats = 0 and dogs = 1
    batch_size = 32 ,
    image_size = (256,256)
)

Found 5000 files belonging to 2 classes.


In [None]:
# These images got stored in the form of a numpy array which is of the size of 0 to 255 and we have to make it from 0 to 1
# Hence we perform normalization
def process(image,label) :
  image = tf.cast(image/255 , tf.float32)
  return image,label

train_ds = train_ds.map(process)
validation_ds = validation_ds.map(process)

In [None]:
# Creating a CNN Model
# 3 Convolution Layers : 1 - 32 filters , 2nd layer - 64 filters, 3rd layer - 128 filteres
model = Sequential()

model.add(Conv2D(32,kernel_size=(3,3),padding='valid',activation = 'relu', input_shape=(256,256,3)))
model.add(MaxPooling2D(pool_size=(2,2),strides=2,padding='valid'))

model.add(Conv2D(64,kernel_size=(3,3),padding='valid',activation = 'relu'))
model.add(MaxPooling2D(pool_size=(2,2),strides=2,padding='valid'))

model.add(Conv2D(128,kernel_size=(3,3),padding='valid',activation = 'relu'))
model.add(MaxPooling2D(pool_size=(2,2),strides=2,padding='valid'))

model.add(Flatten())

model.add(Dense(128,activation = 'relu'))
model.add(Dense(64,activation= 'relu'))
model.add(Dense(1,activation= 'sigmoid'))

In [None]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 254, 254, 32)      896       
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 127, 127, 32)      0         
 g2D)                                                            
                                                                 
 conv2d_4 (Conv2D)           (None, 125, 125, 64)      18496     
                                                                 
 max_pooling2d_4 (MaxPoolin  (None, 62, 62, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_5 (Conv2D)           (None, 60, 60, 128)       73856     
                                                                 
 max_pooling2d_5 (MaxPoolin  (None, 30, 30, 128)      

In [None]:
#end mei we have a tensor of (30,30,128) which we have flattened in the next step

model.compile(optimizer = 'adam',loss='binary_crossentropy', metrics = ['accuracy'])
#binary classification problem hence binary_cross_entropy

In [None]:
history = model.fit(train_ds,epochs=10,validation_data=validation_ds)

Epoch 1/10
Epoch 2/10
 20/625 [..............................] - ETA: 42s - loss: 0.5263 - accuracy: 0.7312

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt
plt.plot(history.history['accuracy'], color = 'red', label = 'train')
plt.plot(history.history['val_accuracy'],color ='blue' , label = 'validation')
plt.legend()
plt.show()

# As the epochs are increasing the training accuracy is also increasing while the validation accuracy remains to be somewhat around 75-80%

In [None]:
# This gap displays that overfitting is happening
plt.plot(history.history['loss'] , color = 'red' , label = 'train')
plt.plot(history.history['val_loss'] , color = 'blue' , label = 'validation')
plt.legend()
plt.show()

In [None]:
import pickle
# to serialize our object and convert it into bytes and store that byte stream into a separate file

# Assuming 'history' is the object you want to serialize
# Replace 'history' with the actual object you want to pickle

# Serialize and save to a file
with open('model.pkl', 'wb') as file:
    pickle.dump(history, file)
