# `Module 7`: Learning from Pre-Built Models

### Problem Statement:

* The categorization of images into distinct classes is a pervasive task in computer vision, and it has a wide array of applications, including in pet identification and animal monitoring systems. 
* we aim to develop a model capable of accurately distinguishing between cat and dog images. 
* Instead of building a convolutional neural network (CNN) from scratch, we will leverage transfer learning using the VGG16 model, a pre-trained model on the ImageNet dataset. 
* VGG16 is renowned for its effectiveness in image recognition tasks, but it does not have broad categories for cats and dogs. 
* Therefore, we will utilize the convolutional (Conv) layers of VGG16 for feature extraction and add custom fully connected (Cat) layers for the classification task.

In [1]:
import numpy as np
import matplotlib.pyplot as plt

import torch
import torchvision

from torchvision import models, datasets, transforms
from torch.utils.data import DataLoader, random_split
import torch.nn as nn

from PIL import Image

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [3]:
# Load pre-trained VGG16 model
model = models.vgg16(pretrained=True)
print(model.classifier)



Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)


In [4]:
# Freeze convolutional base
for param in model.features.parameters():
    param.requires_grad = False

In [5]:
# Modify classifier 
model.classifier = nn.Sequential(
    nn.Linear(25088, 256),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(256, 1),
    nn.Sigmoid()
)

In [6]:
custom_transform = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [7]:
dataset = datasets.ImageFolder(root="dataset/cat_dog_dataset", transform=custom_transform)

In [8]:
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

train_data, test_data = random_split(dataset, [train_size, test_size])

In [9]:
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=True)

In [10]:
model = model.to(device)

In [11]:
print(model.classifier)

Sequential(
  (0): Linear(in_features=25088, out_features=256, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=256, out_features=1, bias=True)
  (4): Sigmoid()
)


In [12]:
epoch_size = 10
lr = 0.01

In [13]:
criterion = nn.BCELoss() # Binary Cross Entropy Loss
optimizer = torch.optim.SGD(model.classifier.parameters(), lr=lr, momentum=0.9)

In [14]:
model.train()
for epoch in range(epoch_size):
    
    total_loss = 0
    total = 0
    for train_img, train_lab in train_loader:
        train_img, train_lab = train_img.to(device), train_lab.to(device).float().unsqueeze(1)
        pred_train_lab = model(train_img)
        
        optimizer.zero_grad()
        loss = criterion(pred_train_lab, train_lab)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{epoch_size}, Loss: {total_loss:0.4f}")

Epoch 1, Loss: 674.2784
Epoch 2, Loss: 1751.8920
Epoch 3, Loss: 1254.7496
Epoch 4, Loss: 1719.2337
Epoch 5, Loss: 1833.5722
Epoch 6, Loss: 1651.1848
Epoch 7, Loss: 1901.5745
Epoch 8, Loss: 1276.4171
Epoch 9, Loss: 2451.0856
Epoch 10, Loss: 3775.5666


In [15]:
torch.save(model.state_dict(), "model/cats_dogs_vgg16.pth")

In [35]:
model.eval()
with torch.no_grad():
    
    total_correct = 0
    total = 0
    for test_img, test_lab in test_loader:
        test_img, test_lab = test_img.to(device), test_lab.to(device).float().unsqueeze(1)

        output = model(test_img)
        pred_test_lab = (output > 0.5).float()
        total_correct += (pred_test_lab == test_lab).sum().item()
        total += test_img.size(0)
        
    print(f"Model Accuracy is {total_correct/total*100:0.2f}%")

Model Accuracy is 59.42%


In [45]:
def predict_image(image_path):
    model.eval()
    img = Image.open(image_path).convert("RGB")
    img = custom_transform(img).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(img)
    confidence = output.item()
    return ('Dog', confidence) if confidence > 0.5 else ('Cat', 1 - confidence)

print(predict_image('test_img.png'))

('Dog', 1.0)
