In [5]:
!mkdir -p ~/.kaggle
# Create this kaggle.json from kaggle profile->Account->Create new API token
!cp kaggle.json ~/.kaggle/

In [6]:
!kaggle datasets download -d salader/dogs-vs-cats

Downloading dogs-vs-cats.zip to /content
100% 1.06G/1.06G [00:50<00:00, 23.5MB/s]
100% 1.06G/1.06G [00:50<00:00, 22.5MB/s]


In [7]:
import zipfile
zip_ref = zipfile.ZipFile("/content/dogs-vs-cats.zip",'r')
zip_ref.extractall('/content')
zip_ref.close()

In [None]:
import tensorflow as tf
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, BatchNormalization, Dropout

In [9]:
# Generators: It is used to work with large data set.
# It will load images in the RAM as needed

train_ds = keras.utils.image_dataset_from_directory(
    directory='/content/train',
    labels='inferred',
    label_mode="int", # It will assign cat 0 and Dog 1
    batch_size=32,
    image_size=(256, 256), # Every image having diffrent size, it will reshape to 256*256
)


test_ds = keras.utils.image_dataset_from_directory(
    directory='/content/test',
    labels='inferred',
    label_mode="int", # It will assign cat 0 and Dog 1
    batch_size=32,
    image_size=(256, 256), # Every image having diffrent size, it will reshape to 256*256
)

Found 20000 files belonging to 2 classes.
Found 5000 files belonging to 2 classes.


In [10]:
# here, image pixels is stored as numpy array range from 0 to 255
# So converting it into 0-1
# Normalizing  it

def process(image, label):
  image = tf.cast(image/255. ,tf.float32)
  return image, label

train_ds = train_ds.map(process)
validation_ds = test_ds.map(process)

In [11]:
# Creating Normal CNN model

model = Sequential()
model.add(Conv2D(32, kernel_size=(3,3), padding='valid', activation='relu', input_shape=(256,256,3)))
model.add(MaxPooling2D(pool_size=(2,2), strides=2, padding='valid'))

model.add(Conv2D(64, kernel_size=(3,3), padding='valid', activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=2, padding='valid'))

model.add(Conv2D(128, kernel_size=(3,3), padding='valid', activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=2, padding='valid'))

model.add(Flatten())

model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))


In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 254, 254, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 127, 127, 32)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 125, 125, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 62, 62, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 60, 60, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 30, 30, 128)       0

In [13]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
hostory = model.fit(train_ds, epochs=10, validation_data=test_ds)

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history["accuracy"], color='red', label='train')
plt.plot(history.history["val_accuracy"], color='blue', label='Validation')
plt.legend()
plt.show()

In [None]:
plt.plot(history.history["loss"], color='red', label='train')
plt.plot(history.history["val_loss"], color='blue', label='Validation')
plt.legend()
plt.show()

### From the above graph, can say that model is overfitted

#### Diffrent ways to reduce overfittings

- Add more data
- Data Augmentation
- L1/L2 regularizer
- Dropout
- Batch Norm
- Reduce Complexity

In [None]:
# Adding Batch Normalizer and dropout

model = Sequential()
model.add(Conv2D(32, kernel_size=(3,3), padding='valid', activation='relu', input_shape=(256,256,3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2), strides=2, padding='valid'))

model.add(Conv2D(64, kernel_size=(3,3), padding='valid', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2), strides=2, padding='valid'))

model.add(Conv2D(128, kernel_size=(3,3), padding='valid', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2), strides=2, padding='valid'))

model.add(Flatten())

model.add(Dense(128, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
hostory = model.fit(train_ds, epochs=10, validation_data=test_ds)

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history["accuracy"], color='red', label='train')
plt.plot(history.history["val_accuracy"], color='blue', label='Validation')
plt.legend()
plt.show()

In [None]:
plt.plot(history.history["loss"], color='red', label='train')
plt.plot(history.history["val_loss"], color='blue', label='Validation')
plt.legend()
plt.show()

- Performance increases a bit, still we can improve it using different methods

## Predictions

In [16]:
import cv2

In [None]:
test_img = cv2.imread("/content/dog.jpg")

In [None]:
plt.imshow(test_img)

In [None]:
test_img.shape

In [None]:
# Resizing test image
test_img = cv2.resize(test_img, (256,256))

# Again resizing it, since we are giving model in batches (1, 256,256, 3): (<1st Batch>, (256,256, (RGB sp 3 layers))
test_input = test_img.reshape((1, 256, 256, 3))

In [None]:
model.predict(test_input)