In [5]:
import torch
import torchvision.models as models
from torch.utils.data import Dataset
import os
import pandas as pd
from torchvision.io import read_image
from torchvision import transforms
from torch.nn import CrossEntropyLoss
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, random_split
import torch.nn as nn
from PIL import Image
from torchvision.transforms.functional import InterpolationMode
import numpy as np

In [6]:
class OxfordIIITPetDataset(Dataset):
    @staticmethod
    def extract_breed(name):
        return ' '.join(name.split("_")[:-1]).lower().title()

    def __init__(self, img_dir, transform=None, transform_label=None):
        self.img_dir = img_dir
        self.image_names = [img_name for img_name in os.listdir(img_dir) if img_name.endswith('.jpg')]
        self.transform = transform
        self.transform_label = transform_label
        self.labels = list(set([self.extract_breed(name) for name in self.image_names]))
        self.labels_to_idx = {item: idx for idx, item in enumerate(self.labels)}

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        image_name = self.image_names[idx]
        img_path = os.path.join(self.img_dir, image_name)
        image = Image.open(img_path).convert('RGB')
        label = self.extract_breed(image_name)
        label = self.labels_to_idx[label]
        
        if self.transform:
            image = self.transform(image)
        if self.transform_label:
            label = self.transform_label(label)
        return image, label

In [7]:
transform = transforms.Compose([
    transforms.Resize((256, 256), interpolation=InterpolationMode.BILINEAR),
    transforms.CenterCrop((224, 224)), 
    transforms.RandomHorizontalFlip(p=0.2),
    transforms.ColorJitter(brightness=0.3, contrast=0.3),
    transforms.RandomRotation(degrees=20),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [8]:
dataset = OxfordIIITPetDataset(img_dir='./Dataset/images', transform=transform)

train_dataset, test_dataset = random_split(dataset, [0.8, 0.2]) 
train_dl = DataLoader(train_dataset, batch_size=100, shuffle=True)
test_dl = DataLoader(test_dataset, batch_size=10, shuffle=False)

In [9]:
pretrained_model = models.resnet18(weights="IMAGENET1K_V1")

for parameter in pretrained_model.parameters():
    parameter.requires_grad = False

pretrained_model.fc = torch.nn.Linear(pretrained_model.fc.in_features, len(dataset.labels))

optimizer = torch.optim.Adam(pretrained_model.parameters())

In [10]:
def train_model(model: OxfordIIITPetDataset, train_loader: DataLoader, optimizer, epochs):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    loss_func = nn.CrossEntropyLoss()
    for epoch in range(epochs):  # loop over the dataset multiple times
        model.train()

        i = 0

        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = loss_func(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            i += train_loader.batch_size
            if i % 2000 == 0:    # print every 2000 mini-batches
                print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
                running_loss = 0.0

    print('Finished Training')

In [11]:
train_model(pretrained_model, train_dl, optimizer, epochs=3)

[1,  2001] loss: 0.031
[1,  4001] loss: 0.021
[1,  6001] loss: 0.015
[2,  2001] loss: 0.011
[2,  4001] loss: 0.009
[2,  6001] loss: 0.009
[3,  2001] loss: 0.008
[3,  4001] loss: 0.007
[3,  6001] loss: 0.007
Finished Training


In [12]:
def test_accuracy(model: OxfordIIITPetDataset, test_loader: DataLoader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    correct = 0
    total = 0
    model.eval()

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)  # Move inputs to device (CPU or GPU)
            labels = labels.to(device)  # Move labels to device (CPU or GPU)
            
            # Forward pass
            outputs = model(inputs)
            
            # Get predictions
            _, predicted = torch.max(outputs, 1)
            
            # Update counts
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    # Calculate accuracy
    accuracy = correct / total
    
    return accuracy

In [13]:
test_accuracy(pretrained_model, test_dl)

0.8328822733423545