In [None]:
# import zipfile
# import os

# # Define the path to the uploaded zip file and the extraction directory
# zip_file_path = '/content/drive/MyDrive/Datasets/Copy of CatsVsDogs.zip'
# extraction_dir = '/content/drive/MyDrive/Datasets/DogsVSCats'

# # Unzip the file
# with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
#     zip_ref.extractall(extraction_dir)

# # Verify the extraction
# print("Extracted files:")
# print(os.listdir(extraction_dir))

# Practice Exercise on Convolutional Neural Networks (CNN)

Welcome to the Practice Exercise on Convolutional Neural Networks (CNN). In this exercise, we will focus on an image classification task where the goal is to predict whether an image contains a cat or a dog. We will work with a dataset of labeled images and build, train, and evaluate a CNN model. This practice will allow you to apply your understanding of CNNs to achieve high accuracy in image classification.

---

## Dataset Overview

### **Dataset Name:** Cats and Dogs Image Dataset

### **Description:**  
The dataset contains images of cats and dogs labeled for classification purposes. Each image belongs to one of the two classes: 'Cat' or 'Dog'. The goal is to classify the images correctly based on the content (i.e., whether the image is of a cat or a dog). The dataset is often used to test image classification models.

### **Features:**
There are two main folders which are:
- `Cat`: Images labeled as containing a cat.
- `Dog`: Images labeled as containing a dog.

### **Target Variable:**
- The goal is to predict whether an image contains a cat or a dog.


## Data Loading and Preprocessing


We will start by loading the dataset and preprocessing the images. This includes:
- Resizing images .
- Normalizing pixel values.

Add more if needed!


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from PIL import Image
import os
from glob import glob

In [3]:
dir = '/content/drive/MyDrive/Datasets/DogsVSCats/content/PetImages/'
dog_dir = os.path.join(dir, 'Dog')
cat_dir = os.path.join(dir, 'Cat')


Found 24999 files belonging to 2 classes.
Using 17500 files for training.


In [None]:
# cat_files = os.listdir(os.path.join(dir, 'Cat'))
# dog_files = os.listdir(os.path.join(dir, 'Dog'))

# # Split cat and dog files into train and temp (combined validation + test)
# cat_train, cat_temp = train_test_split(cat_files, test_size=0.3, random_state=42)
# dog_train, dog_temp = train_test_split(dog_files, test_size=0.3, random_state=42)


In [None]:
# # Further split temp into validation and test
# cat_val, cat_test = train_test_split(cat_temp, test_size=0.5, random_state=42)
# dog_val, dog_test = train_test_split(dog_temp, test_size=0.5, random_state=42)


In [None]:
# from PIL import Image
# import numpy as np
# import os

# def load_images(file_list, base_dir, target_size=(128, 128)):
#     images = []
#     for file in file_list:
#         img_path = os.path.join(base_dir, file)
#         img = Image.open(img_path).resize(target_size)
#         img = np.array(img) / 255.0  # Normalize pixel values to [0, 1]
#         images.append(img)
#     return np.array(images)

# # Base directories
# cat_dir = os.path.join(dir, 'Cat')
# dog_dir = os.path.join(dir, 'Dog')

# # Load training images
# train_images = load_images(cat_train, cat_dir) + load_images(dog_train, dog_dir)
# train_labels = np.array([0] * len(cat_train) + [1] * len(dog_train))
# print('finish 1')
# # Load validation images
# val_images = load_images(cat_val, cat_dir) + load_images(dog_val, dog_dir)
# val_labels = np.array([0] * len(cat_val) + [1] * len(dog_val))
# print('finish 2')
# # Load test images
# test_images = load_images(cat_test, cat_dir) + load_images(dog_test, dog_dir)
# test_labels = np.array([0] * len(cat_test) + [1] * len(dog_test))
# print('finish 3')

## Data Splitting
In this section, we will split our dataset into three parts:

* Training set (70%): This portion of the dataset is used to train the CNN model.
* Validation set (15%): This portion is used to validate the model during training, helping us tune hyperparameters and avoid overfitting.
* Test set (15%): This portion is used to evaluate the model after training, to check its generalization to unseen data.

In [4]:


# Load the dataset and split into training and a combined validation+test set
train_dataset = image_dataset_from_directory(
    dir,
    validation_split=0.3,  # Reserve 30% for validation + test (70% for training)
    subset="training",
    seed=42,
    image_size=(64, 64),
    batch_size=32  # Optional: Set batch size here

)

validation_test_dataset = image_dataset_from_directory(
    dir,
    validation_split=0.3,  # Reserve 30% for validation + test
    subset="validation",   # This subset will be further split into validation and test
    seed=42,
    image_size=(64, 64),
    batch_size=32
)

# Calculate the number of batches in the combined validation+test dataset
val_test_batches = tf.data.experimental.cardinality(validation_test_dataset).numpy()

# Further split the combined validation+test dataset into validation and test datasets
val_dataset = validation_test_dataset.take(val_test_batches // 2)  # 15% for validation
test_dataset = validation_test_dataset.skip(val_test_batches // 2) # 15% for testing


Found 24999 files belonging to 2 classes.
Using 17500 files for training.
Found 24999 files belonging to 2 classes.
Using 7499 files for validation.


In [None]:
# def normalize(image, label):
#     image = tf.cast(image, tf.float32) / 255.0  # Scale pixel values to [0, 1]
#     return image, label

# train_dataset = train_dataset.map(normalize)
# val_dataset = val_dataset.map(normalize)
# test_dataset = test_dataset.map(normalize)


In [None]:
train_dataset

<_MapDataset element_spec=(TensorSpec(shape=(None, 64, 64, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>

## Building the CNN Model


Now, we will define our CNN architecture using `tensorflow.keras`. The architecture will consist of:
- Convolutional layers followed by max-pooling layers
- Flatten layer
- Dense layers
- Output layer


In [None]:
model = Sequential([
    Conv2D(4, (3, 3), activation='relu', input_shape=(64, 64, 3)),
    MaxPooling2D(2, 2),
    Conv2D(8, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    # Conv2D(64, (3, 3), activation='relu'),
    # MaxPooling2D(2, 2),
    Flatten(),
    Dense(2, activation='relu'),
    Dense(4, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


## Training the Model


Train the CNN model using the `fit` function. We will use the training and validation we created earlier.

Fill in the code to train the model for a specified number of epochs.


In [None]:
history = model.fit(train_dataset, epochs=10, validation_data=val_dataset)

Epoch 1/10
[1m298/547[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m15:52[0m 4s/step - accuracy: 0.5175 - loss: 0.6928

InvalidArgumentError: Graph execution error:

Detected at node decode_image/DecodeImage defined at (most recent call last):
<stack traces unavailable>
Number of channels inherent in the image must be 1, 3 or 4, was 2
	 [[{{node decode_image/DecodeImage}}]]
	 [[IteratorGetNext]] [Op:__inference_one_step_on_iterator_2429]

## Evaluating the Model


After training, evaluate the model on the validation data to check its performance.


## Testing with New Images

Finally, let's test the model with some new images. Preprocess the images and use the trained model to predict whether the image is of a cat or a dog.
