# Single Shot Detection

## Setup Collab

### Install nessary packages

#### torchvision install

In [3]:
! pip install torch torchvision

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

#### albumentations install

In [4]:
! pip install -U albumentations

Collecting albumentations
  Downloading albumentations-1.4.11-py3-none-any.whl (165 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m165.3/165.3 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Collecting scikit-image>=0.21.0 (from albumentations)
  Downloading scikit_image-0.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.9/14.9 MB[0m [31m47.6 MB/s[0m eta [36m0:00:00[0m
Collecting scikit-learn>=1.3.2 (from albumentations)
  Downloading scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.4/13.4 MB[0m [31m39.8 MB/s[0m eta [36m0:00:00[0m
Collecting albucore>=0.0.11 (from albumentations)
  Downloading albucore-0.0.12-py3-none-any.whl (8.4 kB)
Collecting eval-type-backport (from albumentations)
  Downloading eval_type_backport-0.2.0-py3-none-any.whl (5.9 kB)
Collecting im

#### opencv-python-headless install

In [5]:
! pip install opencv-python-headless



## Libraries

### import

#### basic Imports

In [6]:
import numpy as np

In [7]:
import cv2


In [8]:
import matplotlib.pyplot as plt


In [9]:
import os

#### Google

In [1]:
from google.colab import files
from google.colab import drive

####  torch

In [10]:
import torch

In [11]:
from torch.utils.data import Dataset, DataLoader

#### torchvision

In [12]:
import torchvision


In [13]:
import torchvision.transforms as transforms

In [15]:
from torchvision.transforms import functional as F

#### Albumentations

In [16]:
import albumentations as A

In [17]:
from albumentations.pytorch import ToTensorV2

#### ElementTree

In [18]:
import xml.etree.ElementTree as ET

### Connect with Google

In [2]:
drive.mount('/content/drive') # drive activate for Use

Mounted at /content/drive


#### Device

In [27]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


## Custom Functions

### Dataset Handle

#### Custom Dataset Class

In [19]:
class CustomDataset(Dataset):
    def __init__(self, img_dir, ann_dir, transform=None):
        self.img_dir = img_dir
        self.ann_dir = ann_dir
        self.transform = transform
        self.imgs = list(sorted(os.listdir(img_dir)))
        self.anns = list(sorted(os.listdir(ann_dir)))

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.imgs[idx])
        ann_path = os.path.join(self.ann_dir, self.anns[idx])

        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        boxes = []
        labels = []

        tree = ET.parse(ann_path)
        root = tree.getroot()

        for member in root.findall('object'):
            labels.append(int(member.find('name').text))
            bndbox = member.find('bndbox')
            xmin = int(bndbox.find('xmin').text)
            ymin = int(bndbox.find('ymin').text)
            xmax = int(bndbox.find('xmax').text)
            ymax = int(bndbox.find('ymax').text)
            boxes.append([xmin, ymin, xmax, ymax])

        boxes = np.array(boxes)
        labels = np.array(labels)

        if self.transform:
            transformed = self.transform(image=img, bboxes=boxes, labels=labels)
            img = transformed['image']
            boxes = transformed['bboxes']
            labels = transformed['labels']

        target = {}
        target['boxes'] = torch.tensor(boxes, dtype=torch.float32)
        target['labels'] = torch.tensor(labels, dtype=torch.int64)

        return img, target




#### Transform

In [20]:
transform = A.Compose([
    A.Resize(300, 300),
    A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
    ToTensorV2()
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))

#### collat function

In [21]:
collate_function=lambda x: tuple(zip(*x))

#### model Evalutation

In [22]:
def evaluate(model, val_loader, device):
    model.eval()
    val_loss = 0
    correct_predictions = 0
    total_predictions = 0

    with torch.no_grad():
        for images, targets in val_loader:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            val_loss += losses.item()

            outputs = model(images)
            for i, output in enumerate(outputs):
                pred_boxes = output['boxes']
                pred_labels = output['labels']
                gt_boxes = targets[i]['boxes']
                gt_labels = targets[i]['labels']

                for box, label in zip(pred_boxes, pred_labels):
                    if label in gt_labels:
                        correct_predictions += 1
                total_predictions += len(gt_labels)

    val_loss /= len(val_loader)
    val_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
    return val_loss, val_accuracy

#### model Traing

In [28]:
def modelTraing(model,num_epochs,train_loader,val_loader):
    train_losses=[]
    val_losses=[]
    val_accuracies=[]
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for images, targets in train_loader:
          images = list(image.to(device) for image in images)
          targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

          loss_dict = model(images, targets)
          losses = sum(loss for loss in loss_dict.values())
          train_loss += losses.item()

          optimizer.zero_grad()
          losses.backward()
          optimizer.step()

        train_loss /= len(train_loader)
        val_loss, val_accuracy = evaluate(model, val_loader, device)

        train_losses.append(train_loss)
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)
    return {
        'val_loss':val_losses,
        'val_accuracy':val_accuracies,
        'train_losses':train_losses
    }

#### Predict

In [29]:
def predict(model, image_path, device):
    model.eval()
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_transformed = transform(image=img)['image']
    img_transformed = img_transformed.to(device)
    with torch.no_grad():
        prediction = model([img_transformed])[0]

    pred_boxes = prediction['boxes'].cpu().numpy().astype(np.int32)
    pred_labels = prediction['labels'].cpu().numpy()

    for i in range(len(pred_boxes)):
        box = pred_boxes[i]
        label = pred_labels[i]
        cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
        cv2.putText(img, f'Class {label}', (box[0], box[1]-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    plt.imshow(img)
    plt.show()




## Dataset

### DataSet Dir

####  Dataset dir

##### train path

In [30]:
trainData_set_dir={
    'image':'',
    'annotate':''
}

##### test path

In [31]:
testData_set_dir={
    'image':'',
    'annotate':''
}

##### Validation Path

In [32]:
valData_set_dir={
    'image':'',
    'annotate':''
}

#### Dataset

##### train

In [33]:
train_dataset=CustomDataset(img_dir=trainData_set_dir['image'], ann_dir=trainData_set_dir['annotate'], transform=transform)

FileNotFoundError: [Errno 2] No such file or directory: ''

##### test

In [None]:
test_dataset=CustomDataset(img_dir=testData_set_dir['image'], ann_dir=testData_set_dir['annotate'], transform=transform)

##### validation

In [None]:
val_dataset=CustomDataset(img_dir=valData_set_dir['image'], ann_dir=valData_set_dir['annotate'], transform=transform)

#### Dataset Loader

##### train

In [None]:
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_function)

##### test

In [None]:
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=True, collate_fn=collate_function)

##### val

In [None]:
validation_loader = DataLoader(val_dataset, batch_size=4, shuffle=True, collate_fn=collate_function)

## Model

In [34]:
model = torchvision.models.detection.ssd300_vgg16(pretrained=True)

Downloading: "https://download.pytorch.org/models/ssd300_vgg16_coco-b556d3b4.pth" to /root/.cache/torch/hub/checkpoints/ssd300_vgg16_coco-b556d3b4.pth
100%|██████████| 136M/136M [00:01<00:00, 118MB/s]


In [35]:

num_classes = 32  # Replace with the number of classes in your dataset, including background
model.head.classification_head.num_classes = num_classes



In [39]:
in_features = model.head.classification_head.cls_logits.in_channels
out_channels = model.head.classification_head.num_classes



AttributeError: 'SSDClassificationHead' object has no attribute 'cls_logits'

In [None]:
cls_logits = torch.nn.Conv2d(in_features, out_channels, kernel_size=3, padding=1)
model.head.classification_head.cls_logits = cls_logits

## Training

In [None]:
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

### Train

num_epoches=3

In [None]:
accur_loss=modelTraing(model,num_epoches)

#### visiblize

In [None]:
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(range(num_epoches), accur_loss['train_losses'], label='Train Loss')
plt.plot(range(num_epoches), accur_loss['val_losses'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Loss Curve')

In [None]:
plt.subplot(1, 2, 2)
plt.plot(range(num_epoches), accur_loss['val_accuracies'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Accuracy Curve')
plt.show()

## Predict