<a href="https://colab.research.google.com/github/Hamza-Ali0237/PyTorch-RCNN-Cats-And-Dogs/blob/main/RCNN-PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Dataset: [https://www.kaggle.com/datasets/tongpython/cat-and-dog](https://)

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
import os
import torch as T
import torch.nn as nn
import torchvision as tv
from torchvision.models import vgg16, VGG16_Weights

In [30]:
class ObjectDetectorCNN(nn.Module):
  def __init__(self):
    super(ObjectDetectorCNN, self).__init__()
    # Define VGG16 model as backbone
    vgg = vgg16(weights=VGG16_Weights.DEFAULT)
    self.backbone = nn.Sequential(*list(
        vgg.features.children()),
        nn.AdaptiveAvgPool2d((7,7)),
        nn.Flatten()
    )
    # Extract input_features from VGG16
    # input_features = nn.Sequential(*list(
    #     vgg.classifier.children()
    # ))[0].in_features
    input_features = 7 * 7 * 512
    # Define the classifier
    self.classifier = nn.Sequential(
        nn.Linear(input_features, 512),
        nn.ReLU(),
        nn.Linear(512, 2)
    )
    # Define the regressor
    self.box_regressor = nn.Sequential(
        nn.Linear(input_features, 512),
        nn.ReLU(),
        nn.Linear(512, 4)
    )

  def forward(self, x):
    features = self.backbone(x)
    bboxes = self.box_regressor(features)
    classes = self.classifier(features)
    return bboxes, classes

In [31]:
model = ObjectDetectorCNN()

In [32]:
print(model)

ObjectDetectorCNN(
  (backbone): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding

In [23]:
train_set = "/content/drive/MyDrive/cats-dogs/train_set/"
test_set = "/content/drive/MyDrive/cats-dogs/test_set/"

In [24]:
# Define transformations to apply on images
transform = tv.transforms.Compose([
    tv.transforms.Resize((224, 224)),
    tv.transforms.ToTensor(),
    tv.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [25]:
# Load dataset
train_dataset = tv.datasets.ImageFolder(train_set, transform=transform)
test_dataset = tv.datasets.ImageFolder(test_set, transform=transform)

batch_size = 32

dataloader_train = T.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
dataloader_test = T.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=4)



In [26]:
classes = train_dataset.classes
print(f"Classes: {classes}")

Classes: ['cats', 'dogs']


In [None]:
# Training the model
class_criterion = nn.BCELoss()

model.training()