In [1]:
import xml.etree.ElementTree as ET
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
def parse_annotation(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    
    folder = root.find('folder').text
    filename = root.find('filename').text
    label = root.find('object').find('name').text
    
    bndbox = root.find('object').find('bndbox')
    xmin = int(bndbox.find('xmin').text)
    ymin = int(bndbox.find('ymin').text)
    xmax = int(bndbox.find('xmax').text)
    ymax = int(bndbox.find('ymax').text)
    
    return folder, filename, label, (xmin, ymin, xmax, ymax)

class DogBreedDataset(Dataset):
    def __init__(self, annotations_dir, images_dir, label_map, transform=None):
        self.annotations_dir = annotations_dir
        self.images_dir = images_dir
        self.transform = transform
        self.label_map = label_map
        self.data = self._load_data()

    def _load_data(self):
        data = []
        for subfolder in os.listdir(self.annotations_dir):
            for xml_file in os.listdir(os.path.join(self.annotations_dir, subfolder)):
                xml_path = os.path.join(self.annotations_dir, subfolder, xml_file)
                folder, filename, label, bbox = parse_annotation(xml_path)
                img_path = os.path.join(self.images_dir, subfolder, xml_file)
                label_idx = self.label_map[label]
                data.append((img_path, label_idx, bbox))
        return data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, label_idx, bbox = self.data[idx]
        img_path = f"{img_path}.jpg"
        
        # Detect image format and open image
        with open(img_path, 'rb') as f:
            image = Image.open(f).convert("RGB")
        
        # Crop the image using the bounding box
        xmin, ymin, xmax, ymax = bbox
        image = image.crop((xmin, ymin, xmax, ymax))
    
        # One-hot encode the label
        label = torch.zeros(len(self.label_map))
        label[label_idx] = 1

        if self.transform:
            image = self.transform(image)
        return image, label

In [3]:
num_classes = 120  # Replace with the actual number of dog breeds
data_folder = "C:\\bccn\programming\data"
images_dir = os.path.join(data_folder, 'images')
annotations_dir = os.path.join(data_folder, 'Annotation')

all_labels = []
for subfolder in os.listdir(annotations_dir):
    all_labels.append(subfolder[10:])

label_map = {breed: idx for idx, breed in enumerate(all_labels)}

In [4]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

dataset = DogBreedDataset(annotations_dir, images_dir, label_map, transform=transform)

#create full dataloader
# dataloader = DataLoader(dataset, batch_size=512, shuffle=True)

#train test split
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

#create train and test dataloader
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=True)

In [5]:
#transer learning model import
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
model.to(device)

# Freeze all layers except the first and last
for name, param in model.named_parameters():
    if 'layer1.0' not in name and 'fc' not in name and 'layer4.1' not in name:
        param.requires_grad = False

#print model layers
for name, param in model.named_parameters():
    print(f"{name}: {param.requires_grad}")



Using device: cuda
conv1.weight: False
bn1.weight: False
bn1.bias: False
layer1.0.conv1.weight: True
layer1.0.bn1.weight: True
layer1.0.bn1.bias: True
layer1.0.conv2.weight: True
layer1.0.bn2.weight: True
layer1.0.bn2.bias: True
layer1.1.conv1.weight: False
layer1.1.bn1.weight: False
layer1.1.bn1.bias: False
layer1.1.conv2.weight: False
layer1.1.bn2.weight: False
layer1.1.bn2.bias: False
layer2.0.conv1.weight: False
layer2.0.bn1.weight: False
layer2.0.bn1.bias: False
layer2.0.conv2.weight: False
layer2.0.bn2.weight: False
layer2.0.bn2.bias: False
layer2.0.downsample.0.weight: False
layer2.0.downsample.1.weight: False
layer2.0.downsample.1.bias: False
layer2.1.conv1.weight: False
layer2.1.bn1.weight: False
layer2.1.bn1.bias: False
layer2.1.conv2.weight: False
layer2.1.bn2.weight: False
layer2.1.bn2.bias: False
layer3.0.conv1.weight: False
layer3.0.bn1.weight: False
layer3.0.bn1.bias: False
layer3.0.conv2.weight: False
layer3.0.bn2.weight: False
layer3.0.bn2.bias: False
layer3.0.downsamp

In [6]:
#regularized cross entropy loss and adam optimizer
# criterion = nn.CrossEntropyLoss()
#l2 regularization
l2_lambda = 0.005
criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=l2_lambda)

# Training the model
num_epochs = 20  # Adjust the number of epochs as needed

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_dataloader:
        images, labels = images.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_dataloader):.4f}")

    # Evaluate the model
    model.eval()
    with torch.no_grad():
        running_loss = 0.0
        for images, labels in test_dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item()

        print(f"Test loss: {running_loss/len(test_dataloader):.4f}")


print("Training complete.")

Epoch [1/20], Loss: 2.5184
Test loss: 2.6461
Epoch [2/20], Loss: 1.7966
Test loss: 2.3254
Epoch [3/20], Loss: 1.8030
Test loss: 2.3988
Epoch [4/20], Loss: 1.7790
Test loss: 1.9414
Epoch [5/20], Loss: 1.7773
Test loss: 1.9992
Epoch [6/20], Loss: 1.7598
Test loss: 2.4329
Epoch [7/20], Loss: 1.7579
Test loss: 2.3419
Epoch [8/20], Loss: 1.7308
Test loss: 2.1174
Epoch [9/20], Loss: 1.7316
Test loss: 2.1535
Epoch [10/20], Loss: 1.7113
Test loss: 1.9253
Epoch [11/20], Loss: 1.6779
Test loss: 1.9509
Epoch [12/20], Loss: 1.6807
Test loss: 1.8811
Epoch [13/20], Loss: 1.6701
Test loss: 2.2138
Epoch [14/20], Loss: 1.6454
Test loss: 1.9070
Epoch [15/20], Loss: 1.6521
Test loss: 2.1030
Epoch [16/20], Loss: 1.6337
Test loss: 1.8079
Epoch [17/20], Loss: 1.6288
Test loss: 2.0362
Epoch [18/20], Loss: 1.6333
Test loss: 2.8483
Epoch [19/20], Loss: 1.6101
Test loss: 2.4791
Epoch [20/20], Loss: 1.6109
Test loss: 3.3296
Training complete.


In [7]:
# Save the model
# torch.save(model, 'dog_breed_classifier.pth')