In [1]:
import tensorflow as tf
import keras

In [2]:
# from keras.preprocessing.image import ImageDataGenerator

In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [4]:
print(tf.__version__) 
print(keras.__version__)

2.15.0
3.0.2


In [5]:
train_data = ImageDataGenerator(rescale = 1/255,shear_range=0.2,zoom_range =0.2)

- **rescale = 1/255**: This argument scales the pixel values of the images by a factor of 1/255, so that they are in the range of [0, 1] instead of [0, 255]. This is a common preprocessing step for neural networks that work with images.
- **shear_range = 0.2**: This argument applies a random shear transformation to the images, which means that the images are skewed along an axis by a certain angle. **The angle is chosen randomly from the range of [-0.2, 0.2] radians**. Shear transformation can help the model learn to recognize objects that are not perfectly aligned or oriented.
- **zoom_range = 0.2**: This argument applies a random zoom transformation to the images, which means that the images are scaled up or down by a certain factor. **The factor is chosen randomly from the range of [1 - 0.2, 1 + 0.2]**. Zoom transformation can help the model learn to recognize objects that are at different distances or sizes.

In [6]:
training_set = train_data.flow_from_directory("dataset/training_set",target_size = (64,64),class_mode = "binary")

Found 8048 images belonging to 2 classes.


- **target_size = (64,64)** # customize (64,64) size accordingly with the least size of your images data
- **class_mode = "binary"** # the labels will be 1D binary arrays, such as [0] for dog and [1] for cat. 
- **class_mode = "categorical" or "sparse"** # If there are more than two classes

In [7]:
training_set.class_indices

{'cats': 0, 'dogs': 1}

In [9]:
test_data = ImageDataGenerator(rescale = 1/255)

test_set = test_data.flow_from_directory('dataset/test_set',target_size = (64,64),class_mode="binary")

Found 2000 images belonging to 2 classes.


## Modelling - Convolution Neural Network 
#### Initialising the CNN

In [10]:
from keras.models import Sequential
classifier = Sequential()

### Step 1 - Convolution

In [11]:
from keras.layers import Conv2D

classifier.add(Conv2D(input_shape=[64,64,3],filters=32,kernel_size=3,activation="relu")) 

  super().__init__(


- **input_shape=[64,64,3]** # specifies the shape of the input images. In this case, the images have a height of 64 pixels, a width of 64 pixels, and 3 color channels (RGB).
- **filters=32** --> **Max** # specifies the number of filters that the convolution layer will learn. Each filter is a small matrix that slides over the input image and produces a scalar value for each position. The output of applying all the filters to the input image is a feature map that has the same height and width as the input, but a depth equal to the number of filters.
- **kernel_size=3** # specifies the size of the filters. In this case, the filters are 3x3 matrices. The kernel size determines the receptive field of the filters, which is the area of the input image that influences the output of each filter.

### Step 2 - Max Pooling

In [12]:
from keras.layers import MaxPooling2D
classifier.add(MaxPooling2D(pool_size=2,strides=2))

- **pool_size = 2**: This argument specifies the size of the pooling regions, which are rectangular windows that slide over the input tensor. The size can be either an integer or a tuple of 2 integers, representing the height and width of the pooling regions. In your case, the pooling regions have a size of 2x2 pixels.
- **strides = 2**: This argument specifies the strides of the pooling regions, which are the distances between two consecutive pooling regions along each dimension. The strides can be either None, an integer, or a tuple of 2 integers. If None, the strides will default to the pool size. In your case, the strides are 2x2 pixels, meaning that the pooling regions are non-overlapping.
- The size and resolution of the input images: Larger images may require larger pooling regions and strides to reduce the dimensionality and avoid overfitting. Smaller images may require smaller pooling regions and strides to preserve the information and avoid underfitting.
- The complexity and diversity of the features: More complex and diverse features may require smaller pooling regions and strides to capture the details and variations. Less complex and diverse features may require larger pooling regions and strides to reduce the noise and redundancy.
- The architecture and depth of the model: Deeper models may require smaller pooling regions and strides to avoid losing too much information and resolution in the lower layers. Shallower models may require larger pooling regions and strides to achieve sufficient downsampling and abstraction in the higher layers.

### Step 3 - Flattening

In [14]:
from keras.layers import Flatten
classifier.add(Flatten())

### Step 4 - Full Connection 

In [15]:
from keras.layers import Dense

# Hidden layer with 128 neurons
classifier.add(Dense(units=128,activation="relu"))

# Output Layer with 1 neuron
classifier.add(Dense(units=1,activation="sigmoid"))

#### Training the CNN Model with Train data & Testing the Model with test data

In [16]:
classifier.compile(optimizer="adam",loss="binary_crossentropy",metrics=["accuracy"])

In [17]:
classifier.fit(x=training_set,validation_data = test_set,epochs=25)

Epoch 1/25


  self._warn_if_super_not_called()


[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 152ms/step - accuracy: 0.5107 - loss: 0.8281 - val_accuracy: 0.5526 - val_loss: 0.6868
Epoch 2/25
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 125ms/step - accuracy: 0.6493 - loss: 0.6287 - val_accuracy: 0.6969 - val_loss: 0.5899
Epoch 3/25
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 138ms/step - accuracy: 0.6952 - loss: 0.5749 - val_accuracy: 0.7262 - val_loss: 0.5491
Epoch 4/25
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 148ms/step - accuracy: 0.7164 - loss: 0.5533 - val_accuracy: 0.7341 - val_loss: 0.5352
Epoch 5/25
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 134ms/step - accuracy: 0.7449 - loss: 0.5182 - val_accuracy: 0.7475 - val_loss: 0.5269
Epoch 6/25
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 139ms/step - accuracy: 0.7535 - loss: 0.5025 - val_accuracy: 0.7465 - val_loss: 0.5239
Epoch 7/25
[1m252/25

<keras.src.callbacks.history.History at 0x1b6ad37c290>

## Evaluation 
#### Making a Single Prediction

In [18]:
import numpy as np
from PIL import Image

In [20]:
# Load the data 
test_image = Image.open("dataset/single_prediction/cat_or_dog_1.jpg")

# Data Preprocessing
test_image = test_image.resize((64,64))
test_image = np.array(test_image)
test_image = np.expand_dims(test_image,axis=0)

# Prediction
result = classifier.predict(test_image)

# Evaluation
if result[0][0] ==1:
    print("Dog")
else:
    print("Cat")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
Dog
