### Load training data

Data: [The Oxford-IIIT Pet Dataset](https://www.robots.ox.ac.uk/~vgg/data/pets/)

In [1]:
import os
import shutil


source_dir = './dogvscat/images'
binary_dir = './dogvscat/binary_classification'
multiclass_dir = './dogvscat/multiclass_classification'

os.makedirs(binary_dir, exist_ok=True)
os.makedirs(multiclass_dir, exist_ok=True)

os.makedirs(os.path.join(binary_dir, 'Cat'), exist_ok=True)
os.makedirs(os.path.join(binary_dir, 'Dog'), exist_ok=True)



#classify types
for filename in os.listdir(source_dir):
    if filename.endswith('.jpg'):
        # Getting names of the breeds(without ".jpg"
        breed = '_'.join(filename.split('_')[:-1])  # keep"_"
        source_file = os.path.join(source_dir, filename)

        # Divide into Cat and Dog
        if breed[0].isupper():  # uppercase = Cat
            shutil.copy(source_file, os.path.join(binary_dir, 'Cat', filename))
            # Copy to breed folder
            breed_dir = os.path.join(multiclass_dir, breed)
            os.makedirs(breed_dir, exist_ok=True)
            shutil.copy(source_file, os.path.join(breed_dir, filename))
        elif breed[0].islower():  # lowercase = Dog
            shutil.copy(source_file, os.path.join(binary_dir, 'Dog', filename))
            # Copy to breed folder
            breed_dir = os.path.join(multiclass_dir, breed)
            os.makedirs(breed_dir, exist_ok=True)
            shutil.copy(source_file, os.path.join(breed_dir, filename))

In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
import torchvision.models as models
import torchvision
from torchvision.datasets import ImageFolder
from torch.autograd.variable import Variable
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import os
from torch.optim.lr_scheduler import StepLR

### First trial with ResNet18

In [5]:
def train_model(model, criterion, optimizer, num_epochs=10):
    model.train()  # model mode: training
    for epoch in range(num_epochs):
        for images, labels in train_loader:
            images = images.to(device)  # send images to GPU
            labels = labels.to(device)  # send labels to GPU

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

def test_model():
    model.eval()  # model mode: evaluating
    total = correct = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)  # send images to GPU
            labels = labels.to(device)  # send labels to GPU
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')

In [11]:
#1.binary classification problem

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
## If MacOS, use this 👇 device selector instead
# device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
weights = models.ResNet18_Weights.DEFAULT
model = models.resnet18(weights=weights).to(device)


# data transform
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# dataset path
dataset_path = './oxford-iiit-pet/images/images//binary_classification'
full_dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

total_size = len(full_dataset)
train_size = int(0.8 * total_size)
test_size = int(0.2 * total_size)


# Split the dataset
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])


# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)



num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2).to(device)  # Adjusting the final layer for binary classification


# Loss and Optimizer
criterion = nn.CrossEntropyLoss().to(device)
optimizer = Adam(model.parameters(), lr=0.001)

train_model(model, criterion, optimizer)
test_model()

Epoch [1/10], Loss: 0.0945
Epoch [2/10], Loss: 0.0292
Epoch [3/10], Loss: 0.0169
Epoch [4/10], Loss: 0.0016
Epoch [5/10], Loss: 0.0375
Epoch [6/10], Loss: 0.0211
Epoch [7/10], Loss: 0.1927
Epoch [8/10], Loss: 0.0171
Epoch [9/10], Loss: 0.0010
Epoch [10/10], Loss: 0.1240
Test Accuracy: 90.26%


### Adjustment for binary task:  
Epochs - 20  
Learning rate - 0.00005  
Added settings to image transformer - RandomCrop (instead of CenterCrop), RandomHorizontalFlip(), RandomRotation(15)

In [18]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
# device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
weights = models.ResNet18_Weights.DEFAULT
model = models.resnet18(weights=weights).to(device)


transform = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(), 
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# dataset path
dataset_path = './oxford-iiit-pet/images/images//binary_classification'
full_dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

total_size = len(full_dataset)
train_size = int(0.8 * total_size)
test_size = int(0.2 * total_size)


# Split the dataset
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])


# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Load pretrained ResNet
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2).to(device)  # Adjusting the final layer for binary classification


# Loss and Optimizer
criterion = nn.CrossEntropyLoss().to(device)
optimizer = Adam(model.parameters(), lr=0.00005)#enhanced a lot
num_epoch = 20

# Training and Evaluation
train_model(model, criterion, optimizer, num_epoch)
test_model()

Epoch [1/20], Loss: 0.1099
Epoch [2/20], Loss: 0.0080
Epoch [3/20], Loss: 0.0009
Epoch [4/20], Loss: 0.0319
Epoch [5/20], Loss: 0.0004
Epoch [6/20], Loss: 0.1547
Epoch [7/20], Loss: 0.0020
Epoch [8/20], Loss: 0.0005
Epoch [9/20], Loss: 0.0005
Epoch [10/20], Loss: 0.0006
Epoch [11/20], Loss: 0.0171
Epoch [12/20], Loss: 0.0296
Epoch [13/20], Loss: 0.0061
Epoch [14/20], Loss: 0.0030
Epoch [15/20], Loss: 0.0005
Epoch [16/20], Loss: 0.0006
Epoch [17/20], Loss: 0.0004
Epoch [18/20], Loss: 0.0003
Epoch [19/20], Loss: 0.0001
Epoch [20/20], Loss: 0.0000
Test Accuracy: 99.32%


### First Trial for multi-class problem

In [6]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
# device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
weights = models.ResNet18_Weights.DEFAULT
model = models.resnet18(weights=weights).to(device)


for param in model.parameters():
    param.requires_grad = False  # freeze all layers first

# finetune: unfreeze layer4
for param in model.layer4.parameters():
    param.requires_grad = True

# replace the fullly-connected layer
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 37).to(device)

# data transform and data augmentation
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# dataset path
dataset_path = './dogvscat/multiclass_classification'
full_dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

total_size = len(full_dataset)
train_size = int(0.8 * total_size)
test_size = int(0.2 * total_size)
# Split the dataset
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])
# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# setting learning rate with Adam optimizer
optimizer = Adam([
    {'params': model.layer4.parameters(), 'lr': 1e-4},  
    {'params': model.fc.parameters()}  
], lr=1e-3)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss().to(device)



# Training and Evaluation
train_model(model, criterion, optimizer)
test_model()

Epoch [1/10], Loss: 1.0054
Epoch [2/10], Loss: 0.4915
Epoch [3/10], Loss: 0.9676
Epoch [4/10], Loss: 0.8297
Epoch [5/10], Loss: 0.5319
Epoch [6/10], Loss: 0.3422
Epoch [7/10], Loss: 0.6671
Epoch [8/10], Loss: 0.3968
Epoch [9/10], Loss: 0.4686
Epoch [10/10], Loss: 0.4815
Test Accuracy: 81.33%


### Adjustment for multi-class task

In [7]:
def train_model_earlystop(model, criterion, optimizer, scheduler, num_epochs=25):
    best_accuracy = 0
    patience = 10
    no_improve_epochs = 0

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        
        # Validation phase
        val_accuracy = validate_model(model, val_loader)
        
        scheduler.step()
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, '
              f'LR: {optimizer.param_groups[0]["lr"]:.6f}, Val Accuracy: {val_accuracy:.2f}%')
        
        # Check for improvement
        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy
            no_improve_epochs = 0
            torch.save(model.state_dict(), 'best_model.pth')
            print("Saved new best model")
        else:
            no_improve_epochs += 1
        
        if no_improve_epochs >= patience:
            print(f"Early stopping triggered at epoch {epoch+1}")
            break

def validate_model(model, data_loader):
    model.eval()
    total = correct = 0
    with torch.no_grad():
        for images, labels in data_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

def retrain_model(model, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        for images, labels in full_train_loader:
            images = images.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

In [9]:
device = torch.device("cuda:0") # if torch.backends.cuda.is_available() else torch.device("cpu")
weights = models.ResNet50_Weights.DEFAULT
model = models.resnet50(weights=weights).to(device)


for param in model.parameters():
    param.requires_grad = False  # unfrozed all layers

# adjustment
for param in model.layer3.parameters():
    param.requires_grad = True
for param in model.layer4.parameters():
    param.requires_grad = True

# replace the last layer
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 37).to(device)


transform = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(), 
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


# Dataset and DataLoader
dataset_path = './dogvscat/multiclass_classification'
full_dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

total_size = len(full_dataset)
train_size = int(0.70 * total_size)
val_size = int(0.15 * total_size)
test_size = total_size - train_size - val_size

# Split the dataset
train_dataset, val_dataset, test_dataset = random_split(full_dataset, [train_size, val_size, test_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


optimizer = Adam([
    {'params': model.layer3.parameters(), 'lr': 0.0001},  
    {'params': model.layer4.parameters(), 'lr': 0.00005},  
    {'params': model.fc.parameters()}  # 
], lr=0.001)

scheduler = StepLR(optimizer, step_size=5, gamma=0.1)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss().to(device)

train_model_earlystop(model, criterion, optimizer, scheduler, num_epochs=50)
# Load the best model and test it
model.load_state_dict(torch.load('best_model.pth', weights_only=True))

#train the full training set for best model
#full_train_dataset = torch.utils.data.ConcatDataset([train_dataset, val_dataset])
#full_train_loader = DataLoader(full_train_dataset, batch_size=32, shuffle=True)
#retrain_model(model, criterion, optimizer, num_epochs=10)
#test_model()

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /home/akhmed/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100.0%
  return F.conv2d(input, weight, bias, self.stride,


Epoch [1/50], Loss: 0.1817, LR: 0.000100, Val Accuracy: 91.88%
Saved new best model
Epoch [2/50], Loss: 0.3701, LR: 0.000100, Val Accuracy: 93.50%
Saved new best model
Epoch [3/50], Loss: 0.1319, LR: 0.000100, Val Accuracy: 94.68%
Saved new best model
Epoch [4/50], Loss: 0.3188, LR: 0.000100, Val Accuracy: 93.59%
Epoch [5/50], Loss: 0.3302, LR: 0.000010, Val Accuracy: 93.86%
Epoch [6/50], Loss: 0.0299, LR: 0.000010, Val Accuracy: 95.22%
Saved new best model
Epoch [7/50], Loss: 0.0150, LR: 0.000010, Val Accuracy: 94.77%
Epoch [8/50], Loss: 0.0759, LR: 0.000010, Val Accuracy: 94.68%
Epoch [9/50], Loss: 0.0447, LR: 0.000010, Val Accuracy: 95.31%
Saved new best model
Epoch [10/50], Loss: 0.0149, LR: 0.000001, Val Accuracy: 95.22%
Epoch [11/50], Loss: 0.0081, LR: 0.000001, Val Accuracy: 95.13%
Epoch [12/50], Loss: 0.0052, LR: 0.000001, Val Accuracy: 95.67%
Saved new best model
Epoch [13/50], Loss: 0.0149, LR: 0.000001, Val Accuracy: 95.76%
Saved new best model
Epoch [14/50], Loss: 0.0967, L

<All keys matched successfully>

In [62]:
def retrain_model(model, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        for images, labels in full_train_loader:
            images = images.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

full_train_dataset = torch.utils.data.ConcatDataset([train_dataset, val_dataset])
full_train_loader = DataLoader(full_train_dataset, batch_size=32, shuffle=True)
retrain_model(model, criterion, optimizer, num_epochs=10)
test_model()

Epoch [1/10], Loss: 0.1009
Epoch [2/10], Loss: 0.0880
Epoch [3/10], Loss: 0.0857
Epoch [4/10], Loss: 0.1197
Epoch [5/10], Loss: 0.2051
Epoch [6/10], Loss: 0.0053
Epoch [7/10], Loss: 0.0056
Epoch [8/10], Loss: 0.2822
Epoch [9/10], Loss: 0.0064
Epoch [10/10], Loss: 0.1795
Test Accuracy: 95.04%
