In [11]:
from PepitoDataset import PepitoDataset
from Models import PepitoModel
from torch.utils.data import DataLoader
import torch
import torch.optim as optim
import torch.nn as nn
import numpy as np

# Data

In [12]:
# Load the dataset
dataset = PepitoDataset("./dataset")

# Split the dataset into train and test and create the dataloaders
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [0.8, 0.2])
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True, num_workers=0)

# Save the split indexes to reproduce the same split later
np.save("train_indexes.npy", train_dataset.indices)
np.save("test_indexes.npy", test_dataset.indices)

# Model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

# Assuming PepitoModel, train_dataloader, and test_dataloader are already defined

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = PepitoModel(2).to(device)  # Output layer should have 2 neurons for two classes
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)  # Added weight decay for regularization
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True)



In [None]:
def train():
    print("Training")
    model.train()
    correct = 0
    total = 0
    running_loss = 0.0

    tqdm_train_dataloader = tqdm(train_dataloader)
    for i, (inputs, _, labels) in enumerate(tqdm_train_dataloader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        running_loss += loss.item()
        
        tqdm_train_dataloader.set_postfix(loss=running_loss / (i + 1))

    print(f"Training Accuracy: {100 * correct / total:.2f}%")


def test():
    print("Testing")
    model.eval()
    correct = 0
    total = 0
    running_loss = 0.0

    with torch.no_grad():
        tqdm_test_dataloader = tqdm(test_dataloader)
        for i, (inputs, _, labels) in enumerate(tqdm_test_dataloader):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)

            loss = criterion(outputs, labels)
            running_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            tqdm_test_dataloader.set_postfix(loss=running_loss / (i + 1))
    accuracy = 100 * correct / total
    print(f"Testing Accuracy: {accuracy:.2f}%")
    return running_loss, accuracy


for epoch in range(5):
    print(f"Epoch {epoch + 1}")
    train()
    val_loss, val_accuracy = test()
    scheduler.step(val_loss)  # Adjust learning rate based on validation loss
    print("--------------------------------------------------")

torch.save(model.state_dict(), f"final_model_random.pt")

Epoch 1
Training


100%|██████████| 502/502 [02:07<00:00,  3.93it/s, loss=0.242]


Training Accuracy: 89.98%
Testing


100%|██████████| 126/126 [00:17<00:00,  7.37it/s, loss=0.14] 


Testing Accuracy: 92.15%
--------------------------------------------------
Epoch 2
Training


100%|██████████| 502/502 [02:06<00:00,  3.97it/s, loss=0.15] 


Training Accuracy: 94.73%
Testing


100%|██████████| 126/126 [00:12<00:00,  9.74it/s, loss=0.0836]


Testing Accuracy: 96.31%
--------------------------------------------------
Epoch 3
Training


100%|██████████| 502/502 [02:03<00:00,  4.06it/s, loss=0.0837]


Training Accuracy: 96.45%
Testing


100%|██████████| 126/126 [00:16<00:00,  7.75it/s, loss=0.0539]


Testing Accuracy: 97.73%
--------------------------------------------------
Epoch 4
Training


100%|██████████| 502/502 [02:01<00:00,  4.12it/s, loss=0.0933] 


Training Accuracy: 96.38%
Testing


100%|██████████| 126/126 [00:16<00:00,  7.81it/s, loss=0.0589]


Testing Accuracy: 97.21%
--------------------------------------------------
Epoch 5
Training


100%|██████████| 502/502 [02:04<00:00,  4.03it/s, loss=0.0921]


Training Accuracy: 95.93%
Testing


100%|██████████| 126/126 [00:12<00:00,  9.78it/s, loss=0.0523]


Testing Accuracy: 97.56%
--------------------------------------------------


In [None]:
import torch
import matplotlib.pyplot as plt
from torchvision import transforms
from PIL import Image

def compute_saliency_maps(X, y, model):
    model.eval()
    X.requires_grad_()
    scores = model(X)
    score_max_index = scores.argmax(dim=1)
    score_max = scores[:, score_max_index]
    score_max.backward(torch.ones_like(score_max))
    saliency, _ = torch.max(X.grad.data.abs(), dim=1)
    return saliency

def show_saliency_maps(data_loader, model):
    model.eval()
    for i, data in enumerate(data_loader):
        inputs, _, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        saliency = compute_saliency_maps(inputs, labels, model)
        saliency = saliency.cpu().numpy()
        for j in range(inputs.size(0)):
            plt.subplot(1, 2, 1)
            plt.imshow(inputs[j].detach().cpu().permute(1, 2, 0))
            plt.axis('off')
            plt.subplot(1, 2, 2)
            plt.imshow(saliency[j], cmap=plt.cm.hot)
            plt.axis('off')
            plt.show()
        if i == 1:
            break

model = PepitoModel(2).to(device)
model.load_state_dict(torch.load("final_model_random.pt"))
# show_saliency_maps(test_dataloader, model)

<All keys matched successfully>

# Analysis

In [16]:
from PepitoDataset import PepitoDataset
from Models import PepitoModel
from torch.utils.data import DataLoader
import torch
import torch.optim as optim
import torch.nn as nn
import numpy as np
from ultralytics import YOLO

# Load the dataset
dataset = PepitoDataset("./dataset")

# Split the dataset into train and test from the saved indexes
train_indexes = np.load("train_indexes.npy")
test_indexes = np.load("test_indexes.npy")
train_dataset = torch.utils.data.Subset(dataset, train_indexes)
test_dataset = torch.utils.data.Subset(dataset, test_indexes)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True, num_workers=0)

# Assuming PepitoModel, train_dataloader, and test_dataloader are already defined

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the models
model = PepitoModel(2).to(device)  # Output layer should have 2 neurons for two classes
box_model = YOLO("yolo11n.pt").to(device)  # load an official model

In [None]:
from PepitoDataset import LABEL_MAP
import matplotlib.pyplot as plt
import cv2
from torchvision import transforms

reversed_label_map = {v: k for k, v in LABEL_MAP.items()}
model.load_state_dict(torch.load("final_model_random.pt"))
model.eval()

total = 0

misclassified_dict = {
    "no_objects": 0,
    "unsure_objects": 0,
    "cat_detected": 0,
}

for i, data in enumerate(test_dataloader):
    inputs, _, labels = data
    inputs, labels = inputs.to(device), labels.to(device)
    outputs = model(inputs)
    _, predicted = torch.max(outputs.data, 1)

    for j in range(inputs.size(0)):
        total += 1
        if predicted[j] != labels[j]:
            # Compute saliency map
            saliency = compute_saliency_maps(inputs[j].unsqueeze(0), labels[j].unsqueeze(0), model)
            saliency = saliency.cpu().numpy()

            # Convert tensor to numpy array and denormalize
            img = inputs[j].detach().cpu().permute(1, 2, 0).numpy()
            img = (img * 255).astype(np.uint8)

            # # Plot original image
            # plt.subplot(1, 3, 1)
            # plt.imshow(img)
            # plt.axis('off')
            # plt.title("Original Image")


            # Plot image with bounding boxes
            bbox_img = transforms.Resize((640, 640))(inputs[j].unsqueeze(0))
            bbox_results = box_model(bbox_img)
            img_with_boxes = bbox_img.squeeze(0).permute(1, 2, 0).cpu().numpy()
            img_with_boxes = (img_with_boxes * 255).astype(np.uint8)
            img_with_boxes = cv2.cvtColor(img_with_boxes, cv2.COLOR_RGB2BGR)  # Convert to BGR format for OpenCV
            for result in bbox_results:
                xyxy = result.boxes.xyxy.cpu().numpy()  # top-left-x, top-left-y, bottom-right-x, bottom-right-y
                names = [result.names[cls.item()] for cls in result.boxes.cls.int()]  # class name of each box
                confs = result.boxes.conf.cpu().numpy()  # confidence score of each box
                for box, name, conf in zip(xyxy, names, confs):
                    cv2.rectangle(img_with_boxes, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 0), 2)
                    cv2.putText(img_with_boxes, f"{name} {conf:.2f}", (int(box[0]), int(box[1]) + 30), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)


            # plt.subplot(1, 3, 2)
            # plt.imshow(img_with_boxes)
            # plt.axis('off')
            # plt.title("Image with Bounding Boxes")

            # # Plot saliency map
            # plt.subplot(1, 3, 3)
            # plt.imshow(saliency[0], cmap=plt.cm.hot)
            # plt.axis('off')
            # plt.title("Saliency Map")

            # plt.show()
            print(f"True label: {reversed_label_map[labels[j].item()]}, Predicted: {reversed_label_map[predicted[j].item()]}")

            print(f"Detected objects: {', '.join([f'{name} {conf:.2f}' for name, conf in zip(names, confs)])}")

            if len(names) == 0:
                misclassified_dict["no_objects"] += 1
            elif "cat" in names:
                misclassified_dict["cat_detected"] += 1
            else:
                misclassified_dict["unsure_objects"] += 1
            print("--------------------------------------------------")




0: 640x640 (no detections), 6.8ms
Speed: 0.1ms preprocess, 6.8ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 640)
True label: out, Predicted: in
Detected objects: 
--------------------------------------------------

0: 640x640 (no detections), 7.2ms
Speed: 0.0ms preprocess, 7.2ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)
True label: out, Predicted: in
Detected objects: 
--------------------------------------------------

0: 640x640 (no detections), 8.3ms
Speed: 0.0ms preprocess, 8.3ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)
True label: out, Predicted: in
Detected objects: 
--------------------------------------------------

0: 640x640 (no detections), 7.0ms
Speed: 0.0ms preprocess, 7.0ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)
True label: out, Predicted: in
Detected objects: 
--------------------------------------------------

0: 640x640 2 tvs, 19.6ms
Speed: 0.0ms preprocess, 19.6ms inference, 7.5

In [21]:
total_misclassified = sum(misclassified_dict.values())
print(f"Accuracy: {100 * (total - total_misclassified) / total:.2f}%")
print(f"Total misclassified: {total_misclassified}")
for key, value in misclassified_dict.items():
    print(f"\t{key}: {value}")
print(f"Total : {total}")

Accuracy: 97.56%
Total misclassified: 98
	no_objects: 64
	unsure_objects: 21
	cat_detected: 13
Total : 4015
