In [None]:
import kagglehub

In [None]:
# Download Dataset
data_dir = kagglehub.dataset_download('andrewmvd/dog-and-cat-detection')
print(f"Path to Dataset: {data_dir}")

# **Libraries**

In [None]:
import os
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from PIL import Image
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from torchvision.models.resnet import ResNet18_Weights

# Data Visualizer
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

# Data Reader
import xml.etree.ElementTree as ET

import time

------------------------------------------------------------------------------------------

# **Sample Data**

In [None]:
sample_annotation = '/kaggle/input/dog-and-cat-detection/annotations/Cats_Test0.xml'
sample_tree = ET.parse(sample_annotation)
sample_root = sample_tree.getroot()

for element in sample_root:
    print(f"{element} is an element in {sample_annotation}")

folder_tag = sample_root.findall('folder')
filename_tag = sample_root.findall('filename')
size_tag = sample_root.findall('size')
segmented_tag = sample_root.findall('segmented')
object_tag = sample_root.findall('object')
print("-"*59)
print(f"Type of folder_tag is: {type(folder_tag)}")
print(f"Type of filename_tag is: {type(filename_tag)}")
print(f"Type of size_tag is: {type(size_tag)}")
print(f"Type of segmented_tag is: {type(segmented_tag)}")
print(f"Type of object_tag is: {type(object_tag)}")

In [None]:
for element in folder_tag[0]:
    print(f"{element} is an element in folder_tag")
print("-"*59)

for element in filename_tag[0]:
    print(f"{element} is an element in filename_tag")
print("-"*59)

for element in size_tag[0]:
    print(f"{element} is an element in size_tag")
print("-"*59)

for i in segmented_tag[0]:
    print(f"{element} is an element in segmented_tag")
print("-"*59)

for element in object_tag[0]:
    print(f"{element} is an element in object_tag")

In [None]:
for element in object_tag[0][5]:
    print(f"{element} is an element in object_tag")

In [None]:
print(object_tag[0][0].text)
print(type(object_tag[0][0].text))

# Tree Structure
```
<annotation>
    <folder>images</folder>
    <filename>Cats_Test0.png</filename>
    <size>
        <width>233</width>
        <height>350</height>
        <depth>3</depth>
    </size>
    <segmented>0</segmented>
    <object>
        <name>cat</name>
        <pose>Unspecified</pose>
        <truncated>0</truncated>
        <occluded>0</occluded>
        <difficult>0</difficult>
        <bndbox>
            <xmin>83</xmin>
            <ymin>29</ymin>
            <xmax>197</xmax>
            <ymax>142</ymax>
        </bndbox>
    </object>
</annotation>

----

# **Dataset**

In [None]:
class ImageDataset(Dataset):
    def __init__(self, annotations_dir, image_dir, transform = None):
        self.annotations_dir = annotations_dir
        self.image_dir = image_dir
        self.transform = transform
        self.image_files = self.filter_images_with_multiple_objects()

    def filter_images_with_multiple_objects(self):
        valid_image_files = []

        # Loop through all Image in directory
        for image in sorted(os.listdir(self.image_dir)):
            
            # Check if annotation exists
            if os.path.isfile(os.path.join(self.image_dir, image)):
                annotation_name = os.path.splitext(image)[0] + '.xml'
                annotation_path = os.path.join(self.annotations_dir, annotation_name)
                try:
                    tree = ET.parse(annotation_path)
                    root = tree.getroot()
                    object_tag = root.findall('object')
                    if len(object_tag) <= 1:
                        valid_image_files.append(image)
                    else:
                        print(f"Image {image} has multiple objects and will be excluded from the dataset")
                        print(" ")
                except FileNotFoundError:
                    valid_image_files.append(image)
        return valid_image_files
                    
    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        # Image Path
        img_name = self.image_files[idx]
        img_path = os.path.join(self.image_dir, img_name)

        # Load Image
        image = Image.open(img_path).convert("RGB")

        # Annotation Path
        annotation_name = os.path.splitext(img_name)[0] + '.xml'
        annotation_path = os.path.join(self.annotations_dir, annotation_name)
        try:
            # Parse Annotation
            tree = ET.parse(annotation_path)
            root = tree.getroot()
            label_name = None
    
            object_tag = root.findall('object')
            # We are working with 1 object/image so no need for loop
            name = object_tag[0][0].text
            if (label_name is None):
                label_name = name
            label = 0 if label_name == 'cat' else 1 if label_name == 'dog' else -1
        except FileNotFoundError:
            print(f"Annotation file {annotation_name} missing. Assining label as -1")
            label = -1
        

        if self.transform:
            image = self.transform(image)

        return image, label

In [None]:
# Data Directories
annotations_dir = os.path.join(data_dir, 'annotations')
image_dir = os.path.join(data_dir, 'images')

# Get list of image files and create dummy dataframe to split the data
image_files = [image for image in sorted(os.listdir(image_dir)) if os.path.isfile(os.path.join(image_dir, image))]
df = pd.DataFrame({'image_name': image_files})
print(f"Dummy Dataset:")
print(df.head())
print("-"*59)

# Split data
train_df, val_df = train_test_split(df, test_size = .2, random_state = 42)
print(f"Train Dataset:")
print(train_df.head())
print("-"*59)
print(f"Validation Dataset:")
print(val_df.head())

In [None]:
# Augmentation/Transforms

transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(
            mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225]
        )
    ]
)

# Datasets
train_set = ImageDataset(annotations_dir, image_dir, transform = transform)
val_set = ImageDataset(annotations_dir, image_dir, transform = transform)

# Filter datasets based on train_df and val_df
# Check if image in image_files is in train_df 
train_set.image_files = [file for file in train_set.image_files
                         if file in train_df['image_name'].values]
val_set.image_files = [file for file in val_set.image_files 
                       if file in val_df['image_name'].values]

# Dataloaders
train_loader = DataLoader(train_set, batch_size = 32, shuffle = True)
val_loader = DataLoader(val_set, batch_size = 32, shuffle = False)

# **Model**

In [None]:
# Model
model = models.resnet18(weights = ResNet18_Weights.DEFAULT)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2) # 2 Classes: Cat n' Dog

# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Loss n' Optimizer
criterion = nn.CrossEntropyLoss()
optim = optim.Adam(model.parameters(), lr = 0.001)

print(model)

# **Training**

In [None]:
def generate_images(model, inputs, labels):
    model.eval()
    class_names = ['cat', 'dog']
    with torch.no_grad():
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)  # Get predicted class

    # Move everything back to CPU for visualization
    inputs, labels, preds = inputs.cpu(), labels.cpu(), preds.cpu()

    # Get the first (and only) sample
    img = inputs[0].numpy().transpose((1, 2, 0))  # Convert to HWC format
    label = labels[0].item()
    pred = preds[0].item()

    # Check if prediction is correct
    correct = pred == label
    color = "purple" if correct else "red"

    # Reverse normalization (assuming ImageNet mean & std)
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    img = std * img + mean  # Undo normalization
    img = np.clip(img, 0, 1)  # Ensure valid pixel values

    # Plot the image with prediction
    plt.figure(figsize=(5, 5))
    plt.imshow(img)
    plt.axis("off")
    plt.title(f"Pred: {class_names[pred]} | Real: {class_names[label]}", color=color, fontsize=12)
    plt.show()

In [None]:
def eval_epoch(model, criterion, val_set, device):
    model.eval()
    corr = 0
    total = 0
    losses = []
    with torch.no_grad():
        for idx, (inputs, labels) in enumerate(val_set):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            
            loss = criterion(outputs, labels)
            losses.append(loss.item())
            _, outputs = outputs.max(1)
            corr += (outputs == labels).sum()
            total += inputs.size(0)
    epoch_loss = sum(losses)/len(losses)
    epoch_accuracy = corr/total
    return epoch_accuracy, epoch_loss

In [None]:
def train_epoch(model, optim, criterion, train_set, device, epoch = 0, log_interval = 10):
    model.train()
    total = 0
    corr = 0
    losses = []
    start_time = time.time()

    for idx, (inputs, labels) in enumerate(train_set):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optim.zero_grad()

        outputs = model(inputs)

        loss = criterion(outputs, labels)
        losses.append(loss.item())

        loss.backward()
        optim.step()
        
        _, outputs = outputs.max(1)
        total += inputs.size(0)
        corr += (outputs == labels).sum()

        if idx % log_interval == 0 and idx > 0:
            elapsed = time.time() - start_time

            print(
                f"| Epoch {epoch} | {idx}/{len(train_set)} Batches | {corr/total} Accuracy"
            )
            total, corr = 0, 0
            start_time = time.time()
    epoch_accuracy = corr/total
    epoch_loss = sum(losses)/len(losses)
    return epoch_accuracy, epoch_loss

In [None]:
def train(model, model_name, save_model, optim, criterion,
         train_set, val_set, num_epochs, device):
    train_accuracy, train_losses = [], []
    eval_accuracy, eval_losses = [], []
    best_accuracy_eval = -1
    times = []
    for epoch in range(1, num_epochs + 1):
        print(f"Starting Epoch {epoch}....")
        print(" ")
        epoch_start_time = time.time()

        # Train
        train_acc, train_loss = train_epoch(model, optim, criterion, train_set, device, epoch)
        train_accuracy.append(train_acc)
        train_losses.append(train_loss)

        # Evaluate
        eval_acc, eval_loss = eval_epoch(model, criterion, val_set, device)
        eval_accuracy.append(eval_acc)
        eval_losses.append(eval_loss)

        # Save best model
        if best_accuracy_eval < eval_acc:
            torch.save(model.state_dict(), save_model + f"/{model_name}.pt")
            inputs_t, targets_t = next(iter(val_set))
            print(" ")
            generate_images(model, inputs_t, targets_t)
            print(" ")
            best_accuracy_eval = eval_acc
        times.append(time.time() - epoch_start_time)

        print("-"*59)
        print(
            f"| End of Epoch {epoch} | Time Taken: {time.time() - epoch_start_time} | Train Accuracy: {train_acc} | Train loss: {train_loss}, | Val Accuracy: {eval_acc} | Val Loss: {eval_loss}"
        )
        print(" ")
        print(f"Epoch {epoch} Ending...")
        print("-" *59)
    # Load best model
    model.load_state_dict(torch.load(save_model + f'/{model_name}.pt'))
    model.eval()
    metrics = {
        'Train Accuracy': train_accuracy,
        'Train Loss': train_losses,
        'Eval Accuracy': eval_accuracy,
        'Eval Loss': eval_losses,
        'Time': times
    }
    return model, metrics

In [None]:
def plot_result(num_epochs, train_accuracy, eval_accuracy, train_losses, eval_losses):
    epochs = list(range(num_epochs))

    # Function to ensure tensors are moved to CPU and converted to numpy
    def ensure_cpu(tensor):
        if isinstance(tensor, torch.Tensor):
            if tensor.device != 'cpu':
                print(f"Tensor is on {tensor.device}. Moving to CPU.")
            return tensor.cpu().numpy()
        return tensor

    # Move tensors to CPU and convert to numpy if necessary
    train_accuracy = ensure_cpu(train_accuracy)
    eval_accuracy = ensure_cpu(eval_accuracy)
    train_losses = ensure_cpu(train_losses)
    eval_losses = ensure_cpu(eval_losses)

    # Debugging: check the types and shapes of tensors
    print(f"train_accuracy: {train_accuracy.shape if isinstance(train_accuracy, np.ndarray) else type(train_accuracy)}")
    print(f"eval_accuracy: {eval_accuracy.shape if isinstance(eval_accuracy, np.ndarray) else type(eval_accuracy)}")
    print(f"train_losses: {train_losses.shape if isinstance(train_losses, np.ndarray) else type(train_losses)}")
    print(f"eval_losses: {eval_losses.shape if isinstance(eval_losses, np.ndarray) else type(eval_losses)}")

    # Plotting
    fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))
    
    axs[0].plot(epochs, train_accuracy, label="Training")
    axs[0].plot(epochs, eval_accuracy, label="Evaluation")
    axs[0].set_title("Accuracy")
    axs[0].set_xlabel("Epoch")
    axs[0].set_ylabel("Accuracy")
    axs[0].legend()

    axs[1].plot(epochs, train_losses, label="Training")
    axs[1].plot(epochs, eval_losses, label="Evaluation")
    axs[1].set_title("Loss")
    axs[1].set_xlabel("Epoch")
    axs[1].set_ylabel("Loss")
    axs[1].legend()

    plt.tight_layout()
    plt.show()


# **Results**

In [None]:
epochs = 10
save_model = './ResNet18'
os.makedirs(save_model, exist_ok = True)

ResNet18, metrics = train(
    model, 'ResNet18Classification', save_model, optim, criterion, train_loader, val_loader, epochs, device
)

In [None]:
def visualize_predictions(model, dataloader, num_images=10):
    model.eval()
    images_so_far = 0
    class_names = ["cat", "dog"]
    fig = plt.figure(figsize=(10, 8))

    with torch.no_grad():
        for i, (images, labels) in enumerate(dataloader):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, preds = torch.max(outputs, 1)

            for j in range(images.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images // 2, 2, images_so_far)
                ax.axis("off")
                ax.set_title(
                    f"Predicted: {class_names[preds[j]]}, Actual: {class_names[labels[j]]}",
                    fontdict={
                        "fontsize": 10,
                        "color": "purple" if preds[j] == labels[j] else "red",
                    },
                )
                # Denormalize and display the image
                img = images.cpu().data[j].numpy().transpose((1, 2, 0))
                mean = [0.485, 0.456, 0.406]
                std = [0.229, 0.224, 0.225]
                img = std * img + mean
                img = np.clip(img, 0, 1)
                plt.imshow(img)

                if images_so_far == num_images:
                    plt.tight_layout()
                    return


visualize_predictions(model, val_loader)
plt.show()

In [None]:
def plot_confusion_matrix(model, dataloader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=['cat', 'dog'], yticklabels=['cat', 'dog'])
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.title("Confusion Matrix")
    plt.show()

plot_confusion_matrix(model, val_loader)

In [None]:
plot_result(
    epochs,
    metrics["Train Accuracy"],
    metrics["Eval Accuracy"],
    metrics["Train Loss"],
    metrics["Eval Loss"]
)