In [1]:
!conda install pytorch torchvision torchaudio cpuonly -c pytorch

Channels:
 - pytorch
 - defaults
Platform: win-64
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.



In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from PIL import Image
import os
import numpy as np
import matplotlib.pyplot as plt

In [3]:
import os
from torch.utils.data import Dataset
from PIL import Image

class ImageDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.classes = [cls for cls in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, cls))] 
        self.image_paths = []
        self.labels = []

        for idx, class_name in enumerate(self.classes):
            class_dir = os.path.join(data_dir, class_name)
            for img_name in os.listdir(class_dir):
                img_path = os.path.join(class_dir, img_name)
                if os.path.isfile(img_path):  
                    self.image_paths.append(img_path)
                    self.labels.append(idx)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")  
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [4]:
# Define transformations for training and validation
train_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])
val_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [5]:
# Define paths
train_dir = "D:\\BODY\\Academia_Lboro\\AI & ML\\coursework_task01_imageset\\imageset\\train"  
val_dir = "D:\\BODY\\Academia_Lboro\\AI & ML\\coursework_task01_imageset\\imageset\\val"  

# Create datasets
train_dataset = ImageDataset(train_dir, transform=train_transform)
val_dataset = ImageDataset(val_dir, transform=val_transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [6]:
# Load a pre-trained ResNet18 model
model = models.resnet18(pretrained=True)

# Replace the final fully connected layer for your number of classes
num_classes = len(train_dataset.classes)
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Move the model to the device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)



In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [8]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Track statistics
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    # Print training statistics
    train_loss = running_loss / len(train_loader)
    train_acc = 100.0 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss:.4f}, Accuracy: {train_acc:.2f}%")

    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    # Print validation statistics
    val_loss = val_loss / len(val_loader)
    val_acc = 100.0 * correct / total
    print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.2f}%")

Epoch [1/10], Loss: 0.8353, Accuracy: 74.21%
Validation Loss: 0.6346, Validation Accuracy: 79.85%
Epoch [2/10], Loss: 0.6008, Accuracy: 81.19%
Validation Loss: 0.7755, Validation Accuracy: 77.68%
Epoch [3/10], Loss: 0.4960, Accuracy: 84.48%
Validation Loss: 0.5437, Validation Accuracy: 83.54%
Epoch [4/10], Loss: 0.4133, Accuracy: 86.58%
Validation Loss: 0.6269, Validation Accuracy: 81.94%
Epoch [5/10], Loss: 0.3904, Accuracy: 87.68%
Validation Loss: 0.4779, Validation Accuracy: 86.04%
Epoch [6/10], Loss: 0.3380, Accuracy: 89.10%
Validation Loss: 0.6290, Validation Accuracy: 83.18%
Epoch [7/10], Loss: 0.3346, Accuracy: 89.11%
Validation Loss: 0.5324, Validation Accuracy: 84.56%
Epoch [8/10], Loss: 0.2875, Accuracy: 90.69%
Validation Loss: 0.4688, Validation Accuracy: 86.01%
Epoch [9/10], Loss: 0.2735, Accuracy: 90.96%
Validation Loss: 0.4730, Validation Accuracy: 86.68%
Epoch [10/10], Loss: 0.2410, Accuracy: 92.22%
Validation Loss: 0.4774, Validation Accuracy: 86.19%


In [9]:
torch.save(model.state_dict(), "D:\\BODY\\Academia_Lboro\\AI & ML\\coursework_task01_imageset\\imageset\\image_classification_resnet18.pth")

In [10]:
def classify_test_image(model, image_path, transform, class_names, device):
    # Load and preprocess the image
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0) 
    image = image.to(device)

    # Set the model to evaluation mode
    model.eval()

    # Perform inference
    with torch.no_grad():
        output = model(image)
        _, predicted = output.max(1)
        predicted_class = class_names[predicted.item()]

    return predicted_class

# Example usage
test_image_path = "D:\\BODY\\Academia_Lboro\\AI & ML\\coursework_task01_imageset\\imageset\\val\\n03000684\\ILSVRC2012_val_00045501.JPEG"  #test image
predicted_class = classify_test_image(model, test_image_path, val_transform, train_dataset.classes, device)
print(f"Predicted Class: {predicted_class}")

Predicted Class: n03000684
