<a href="https://colab.research.google.com/github/ThanuMahee12/ayush-vision/blob/ssd/Algorithums/detection/SSD/SSDCustom.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch torchvision



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
root="/content/drive/MyDrive/AyushVision/ssd/dataset/ayushvision_root"

In [4]:
import torch
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.models.detection import ssd300_vgg16
from torchvision.transforms import functional as F
from torchvision.datasets import VOCDetection
import torchvision.transforms as T

In [5]:
# Custom transform for resizing, normalization, etc.
class TransformWrapper:
    def __call__(self, image, target):
        # Convert image to tensor and resize
        image = F.to_tensor(image)
        image = F.resize(image, (300, 300))  # SSD300 requires image size 300x300

        # Process target
        boxes = []
        labels = []

        for obj in target['annotation']['object']:
            # Get bounding box coordinates
            bndbox = obj['bndbox']
            boxes.append([
                float(bndbox['xmin']),
                float(bndbox['ymin']),
                float(bndbox['xmax']),
                float(bndbox['ymax'])
            ])

            # Example label, replace with actual label parsing
            labels.append(1)  # Replace 1 with actual label if available

        # Convert lists to tensors
        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

        target = {
            'boxes': boxes,
            'labels': labels
        }

        return image, target

In [6]:
# Use a pre-trained SSD300 model
model = ssd300_vgg16(pretrained=True)
model = model.eval()  # Set to evaluation mode (for inference)

Downloading: "https://download.pytorch.org/models/ssd300_vgg16_coco-b556d3b4.pth" to /root/.cache/torch/hub/checkpoints/ssd300_vgg16_coco-b556d3b4.pth
100%|██████████| 136M/136M [00:04<00:00, 29.9MB/s]


In [7]:
transform = TransformWrapper()

In [8]:
# Paths to the datasets
train_dataset = VOCDetection(root='/content/drive/MyDrive/AyushVision/ssd/dataset/ayushvision_root', year='2012', image_set='train', download=False, transforms=transform)

In [9]:
test_dataset = VOCDetection(root='/content/drive/MyDrive/AyushVision/ssd/dataset/ayushvision_root', year='2012', image_set='val', download=False, transforms=transform)

In [10]:
valid_dataset = VOCDetection(root='/content/drive/MyDrive/AyushVision/ssd/dataset/ayushvision_root', year='2012', image_set='trainval', download=False, transforms=transform)

In [11]:
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))
valid_loader = DataLoader(valid_dataset, batch_size=8, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

# Check if the dataset is loaded properly
for images, targets in train_loader:
    print(f"Loaded batch of images: {len(images)}")
    break


Loaded batch of images: 8


In [12]:
def train(model, dataloader, optimizer, device):
    model.train()
    for images, targets in dataloader:
        images = list(image.to(device) for image in images)

        # Process targets
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Forward pass
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        # Backprop
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    return losses.item()

In [13]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

SSD(
  (backbone): SSDFeatureExtractorVGG(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace=True)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (6): ReLU(inplace=True)
      (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (8): ReLU(inplace=True)
      (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace=True)
      (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (13): ReLU(inplace=True)
      (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (15): ReLU(inplace=

In [14]:
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [None]:
for epoch in range(10):  # 10 epochs
    loss = train(model, train_loader, optimizer, device)
    print(f"Epoch [{epoch+1}/10], Loss: {loss:.4f}")


In [None]:
from torchvision.ops import box_iou

def evaluate(model, dataloader, device):
    model.eval()
    all_iou = []
    with torch.no_grad():
        for images, targets in dataloader:
            images = list(img.to(device) for img in images)
            outputs = model(images)

            for i, output in enumerate(outputs):
                target_boxes = targets[i]['boxes'].to(device)
                pred_boxes = output['boxes']

                # Compute IoU for each image
                iou = box_iou(pred_boxes, target_boxes).mean().item()
                all_iou.append(iou)

    avg_iou = sum(all_iou) / len(all_iou)
    print(f"Average IoU: {avg_iou:.4f}")
    return avg_iou

# Evaluate on the test set
evaluate(model, test_loader, device)


AttributeError: 'Tensor' object has no attribute 'items'