In [96]:
import torch
import torchvision
import torch.nn as nn
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torchvision.models.detection import ssdlite320_mobilenet_v3_large
from torch.optim.lr_scheduler import MultiStepLR
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from torchvision.datasets import CocoDetection
import torch.optim as optim
from torch.utils.data import DataLoader
import xml.etree.ElementTree as ET
import matplotlib.patches as patches
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [128]:
def collate_fn(batch):
    """
    To handle the data loading as different images may have different number 
    of objects and to handle varying size tensors as well.
    """
    return tuple(zip(*batch))


def visualize_image_with_boxes(image, bounding_boxes):
    # Create a Matplotlib figure and axis
    fig, ax = plt.subplots(1)
    # Display the image
    image_data = np.transpose(image, (1, 2, 0))
    ax.imshow(image_data)  

    # Add bounding boxes to the image
    #print("in the visual:",bounding_boxes)
    #for box in bounding_boxes['boxes']:
    for box in bounding_boxes:
         #Note: box['boxes'][0] changed to box['boxes'][0][0] accounting for tensor in epoch loop, change if needed
        #print(box)
        xmin, ymin, xmax, ymax = box[0], box[1], box[2], box[3]
        #label = box['label']
        # Create a Rectangle patch
        rect = patches.Rectangle(
            (xmin, ymin), 
            xmax-xmin, 
            ymax-ymin, 
            linewidth=1, 
            edgecolor='r', 
            facecolor='none', 
            label=0
        )

        # Add the rectangle to the axis
        ax.add_patch(rect)

    # Show the image with bounding boxes
    plt.show()

In [98]:
def parse_annotation(annotation_folder):
    final_annotation =[]
    count=1
    for annotation_file in os.listdir(annotation_folder):
        tree = ET.parse(os.path.join(annotation_folder,annotation_file))
        root = tree.getroot()
        parse_annotate_file = []
        for obj in root.findall('object'):
            xmin = float(obj.find('bndbox/xmin').text)
            ymin = float(obj.find('bndbox/ymin').text)
            xmax = float(obj.find('bndbox/xmax').text)
            ymax = float(obj.find('bndbox/ymax').text)
            
            bounding_boxes=[
                xmin,
                ymin,
                xmax,
                ymax
            ]
            parse_annotate_file.append({'bbox':bounding_boxes,'category_id': 1})
        final_annotation.append({'image_id':count,'file_name':annotation_file.replace('.xml', '.png'),'annotations':parse_annotate_file})
        count+=1
    return final_annotation

In [130]:
def adjust_bounding_boxes(bounding_boxes, resize_scale_w, resize_scale_h):
    temp_box = []
    for box in bounding_boxes:
        xmin, ymin, xmax, ymax = box['bbox'][0], box['bbox'][1], box['bbox'][2], box['bbox'][3]
        xmin *= 1/resize_scale_w
        ymin *= 1/resize_scale_h
        xmax *= 1/resize_scale_w
        ymax *= 1/resize_scale_h
        adjusted_boxes=[
            xmin,
            ymin,
            xmax,
            ymax,
        ]
        temp_box.append({'bbox':adjusted_boxes,'category_id': 1})
    return temp_box

def resize_and_pad_image(image, target_size):
    # Calculate the aspect ratio
    aspect_ratio = image.shape[1] / image.shape[0]

    # Determine which dimension (width or height) to fix and resize
    if aspect_ratio > 1:  # Image is wider
        new_width = target_size
        new_height = int(target_size / aspect_ratio)
    else:  # Image is taller or square
        new_height = target_size
        new_width = int(target_size * aspect_ratio)

    # Resize the image while preserving aspect ratio
    resized_image = cv2.resize(image, (new_width, new_height))

    # Pad the image to the desired dimensions
    pad_x = max(0, target_size - new_width)
    pad_y = max(0, target_size - new_height)
    padded_image = cv2.copyMakeBorder(resized_image, 0, pad_y, 0, pad_x, cv2.BORDER_CONSTANT, value=0)

    return padded_image

In [100]:
class CustomCocoDetection(CocoDetection):
    def __init__(self, root,custom_annotations,transforms=None):
        self.transforms = transforms
        self.root = root
        self.custom_annotations = parse_annotation(custom_annotations)
        self.transforms = transforms
        self.adjusted = 0
    
    def __len__(self):
        return len(self.custom_annotations)
    
    def __getitem__(self, index):
        img = cv2.imread(os.path.join(self.root, self.custom_annotations[index]['file_name']))
        #img = Image.open(os.path.join(self.root, self.custom_annotations[index]['file_name']))
        #plt.imshow(img)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
        #img = np.array(img)
        custom_annotations = self.custom_annotations[index].copy()
        target_size = 300  # Desired input size (e.g., 400x400)
        #1 = width, 0 = height
        aspect_ratio = img.shape[1] / img.shape[0]
        # Calculate aspect ratio of the original image
        resize_scale_w=1
        resize_scale_h=1
        # Determine which dimension to fix (width or height)
        if aspect_ratio >= 1:  # Image is wider
            resize_scale_w = img.shape[1]/target_size
            resize_scale_h = img.shape[1]/target_size # Scale height by the same factor
        elif aspect_ratio < 1:  # Image is taller or square
            resize_scale_h = img.shape[0]/target_size
            resize_scale_w = img.shape[0]/target_size
        img = resize_and_pad_image(img,target_size=target_size)
        if(self.adjusted==0):
            custom_annotations['annotations'] = adjust_bounding_boxes(custom_annotations['annotations'],resize_scale_w,resize_scale_h)
            self.adjusted=1
        elif(self.adjusted==1):
            custom_annotations = self.custom_annotations[index].copy()
            custom_annotations['annotations'] = adjust_bounding_boxes(custom_annotations['annotations'],resize_scale_w,resize_scale_h)
        #TODO: rescale and pad needed
        # Convert (x1, y1, x2, y2) format to (x, y, width, height) format
        coco_annotations = []
        labels =[]
        boxes=[]
        for bbox in custom_annotations['annotations']:
            x1, y1, x2, y2 = bbox['bbox']
            x_2 = float(x2)
            y_2 = float(y2)
            x = float(x1)
            y = float(y1)
            #print(x,y,x_2,y_2)
            if x_2 >= target_size:
                x_2 = target_size-1
            if y_2 >= target_size:
                y_2 = target_size-1
            #print("========")
            #print(x, y, x_2, y_2)
            if x_2 <= x or y_2 <= y:
                # Handle the invalid bounding box, e.g., by skipping or correcting it.
                continue
            else:
                boxes.append([x, y, x_2, y_2])
                labels.append(1)
        #TODO: AREA,
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) if len(boxes) > 0 \
            else torch.as_tensor(boxes, dtype=torch.float32)
        iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        # Convert custom annotations to COCO format
        #print("in loader:",boxes)
        coco_target = {
            "boxes": boxes,
            'labels':labels,
            'area':area,
            'iscrowd':iscrowd,
            "image_id": torch.as_tensor(index,dtype=torch.int64),
            #"file_name": self.custom_annotations[index]['file_name'],
        }
        img /=255.0
        if self.transforms:
            sample = self.transforms(image = img,
                                     bboxes = coco_target['boxes'],
                                     labels = labels)
            image_resized = sample['image']
            coco_target['boxes'] = torch.Tensor(sample['bboxes'])
        if np.isnan((coco_target['boxes']).numpy()).any() or coco_target['boxes'].shape == torch.Size([0]):
            coco_target['boxes'] = torch.zeros((0, 4), dtype=torch.int64)
        if index ==1:
            print(self.custom_annotations[index])
        return image_resized, coco_target

In [101]:
def get_train_transform():
    return A.Compose([
        ToTensorV2(p=1.0),
    ], bbox_params={
        'format': 'pascal_voc',
        'label_fields': ['labels']
    })

In [125]:
dataset = CustomCocoDetection('./Dataset/images','./Dataset/annotations/',transforms=get_train_transform())
val_set = CustomCocoDetection('./Validate/images','./Validate/annotations/',transforms=get_train_transform())

In [103]:
#print(dataset.test(0)[0])
#temp,temp1 =val_set.__getitem__(1) 
#TODO: Error karena variable copy nge edit variable original, need rework and refactoring.
#print(temp1['file_name'])
#print(temp.shape)
#visualize_image_with_boxes(temp,temp1)
#visualize_image_with_boxes(dataset.__getitem__(1)[0],dataset.__getitem__(1)[1]['annotations'])

In [104]:
class PotholeDetector(nn.Module):
    def __init__(self):
        super(PotholeDetector, self).__init__()
        self.base_model = ssdlite320_mobilenet_v3_large(num_classes=2,pretrained=False)


    def forward(self, image,target):
        #print(image.shape)
        return self.base_model(image,target)

In [126]:
trainloader = DataLoader(dataset,batch_size=10,shuffle=True,collate_fn=collate_fn)
val_loader = DataLoader(val_set,batch_size=10,shuffle=True,collate_fn=collate_fn)

In [106]:
model = PotholeDetector()
model.train()
model = model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(
        params, lr=0.001, momentum=0.9, nesterov=True
    )
scheduler = MultiStepLR(
        optimizer=optimizer, milestones=[45], gamma=0.1, verbose=True
    )




Adjusting learning rate of group 0 to 1.0000e-03.


In [107]:
for epoch in range(0,200):  # loop over the dataset multiple times
    print(f"Epoch [{epoch}]")
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        optimizer.zero_grad()
        images, labels = data
        images = list(image.to(device) for image in images)

        targets = [{k: v.to(device) for k, v in t.items()} for t in labels]

        outputs= model(images,targets)
        #print('pass')
        losses = sum(loss for loss in outputs.values())

        losses.backward()
        optimizer.step()

        # print statistics
        running_loss += losses.item()
        #print(f"Iteration [{i}]: Loss = {running_loss}\n")
        if (i % 50 == 0 and i!=0):    # print every 2000 mini-batches
            print(f'iter [{i}]| loss: {running_loss / 50:.20f} ')
            running_loss = 0.0
    scheduler.step()
    print()

Epoch [0]
{'image_id': 2, 'file_name': 'potholes101.png', 'annotations': [{'bbox': [158.0, 164.0, 235.0, 185.0], 'category_id': 1}, {'bbox': [319.0, 186.0, 386.0, 211.0], 'category_id': 1}]}
iter [50]| loss: 7.50062845230102581695 
Adjusting learning rate of group 0 to 1.0000e-03.

Epoch [1]
{'image_id': 2, 'file_name': 'potholes101.png', 'annotations': [{'bbox': [158.0, 164.0, 235.0, 185.0], 'category_id': 1}, {'bbox': [319.0, 186.0, 386.0, 211.0], 'category_id': 1}]}
iter [50]| loss: 6.14656947135925335601 
Adjusting learning rate of group 0 to 1.0000e-03.

Epoch [2]
{'image_id': 2, 'file_name': 'potholes101.png', 'annotations': [{'bbox': [158.0, 164.0, 235.0, 185.0], 'category_id': 1}, {'bbox': [319.0, 186.0, 386.0, 211.0], 'category_id': 1}]}
iter [50]| loss: 5.63475920677185015961 
Adjusting learning rate of group 0 to 1.0000e-03.

Epoch [3]
{'image_id': 2, 'file_name': 'potholes101.png', 'annotations': [{'bbox': [158.0, 164.0, 235.0, 185.0], 'category_id': 1}, {'bbox': [319.0, 18

In [134]:
model.eval()
target = []
preds = []
#args = vars(parser.parse_args())
for i, data in enumerate(val_loader, 0):
    images,targets = data
    images = list(image.to(device) for image in images)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
    with torch.no_grad():
            outputs = model(images, targets)
    
    images = list(image.to('cpu') for image in images)
    outputs = [{k: v.to('cpu') for k, v in t.items()} for t in outputs]
    #iou_threshold = 0.5
    #print(outputs)
    #print("test")
    if len(outputs[0]['boxes']) != 0:
        for i,output in enumerate(outputs,0):
                #print(output)
                boxes = output['boxes'].data.numpy()
                scores = output['scores'].data.numpy()
                # Filter out boxes according to `detection_threshold`.
                boxes = boxes[scores >= 0.25].astype(np.int32)
                draw_boxes = boxes.copy()
                #print(draw_boxes)
                #print(output)
                #print("===")
                #keep_indices = torchvision.ops.nms(output['boxes'], output['scores'], iou_threshold)
                #selected_boxes = output
                #print(len(selected_boxes['boxes']))
                #print("===========================")
                #selected_boxes['boxes']=selected_boxes['boxes'][keep_indices]
                #print(len(selected_boxes['boxes']))
                #visualize_image_with_boxes(images[i],draw_boxes)
                #temp = input("hello")

# Perform NMS
    #keep_indices = torchvision.ops.nms(boxes, scores, iou_threshold)
    for i in range(len(images)):
            true_dict = dict()
            preds_dict = dict()
            true_dict['boxes'] = targets[i]['boxes'].detach().cpu()
            true_dict['labels'] = targets[i]['labels'].detach().cpu()
            preds_dict['boxes'] = outputs[i]['boxes'].detach().cpu()
            preds_dict['scores'] = outputs[i]['scores'].detach().cpu()
            preds_dict['labels'] = outputs[i]['labels'].detach().cpu()
            preds.append(preds_dict)
            target.append(true_dict)
metric = MeanAveragePrecision()
metric.update(preds, target)
metric_summary = metric.compute()
print(f"mAP_50: {metric_summary['map_50']*100:.3f}")
print(f"mAP_50_95: {metric_summary['map']*100:.3f}")

test
test
test
{'image_id': 2, 'file_name': 'potholes11.png', 'annotations': [{'bbox': [139.0, 153.0, 260.0, 237.0], 'category_id': 1}, {'bbox': [36.0, 170.0, 67.0, 199.0], 'category_id': 1}]}
test
test
test
test
test
test
test


ModuleNotFoundError: `MAP` metric requires that `pycocotools` installed. Please install with `pip install pycocotools` or `pip install torchmetrics[detection]`

In [None]:
#TODO: Cleanup code, new optimizer, losses, etc. create evaluator