In [None]:
from google.colab import drive

# mount google drive
drive.mount("/content/drive")

In [None]:
# install required packages
!pip install pycocotools
!pip install vision-transformers

In [None]:
# unzip data into colab
!unzip /content/drive/MyDrive/fasterrcnn.zip
!unzip /content/drive/MyDrive/CRAG_JSON_2.zip -d CRAG_JSON_2
!unzip /content/drive/MyDrive/CRAG_JSON.zip

In [None]:
import os
import torch
import torch.utils.data
import torchvision
from PIL import Image
from pycocotools.coco import COCO
import numpy as np
import matplotlib.pyplot as plt
import cv2

# adapted from https://medium.com/fullstackai/how-to-train-an-object-detector-with-your-own-coco-dataset-in-pytorch-319e7090da5

# data loader following torch.utils.data.Dataset
class ObjectDetectionDataset(torch.utils.data.Dataset):
    def __init__(
        self,
        x_path: str,
        y_path: str,
        transforms: list = None,
        annotation_style="rectangular",
        info: bool = False,
        format: str = "faster rcnn",
    ) -> None:
        super().__init__()
        self.annotation_style = annotation_style
        self.format = format
        assert self.annotation_style in ["rectangular", "circular"]
        # faster rcnn requires [xmin, ymin, xmax, ymax], normal is [x, y, width, height]
        assert self.format in ["faster rcnn", "normal"]

        self.x_path = x_path
        self.coco = COCO(y_path)
        self.transforms = transforms
        self.ids = list(sorted(self.coco.imgs.keys()))
        self.info = info

    def __getitem__(self, index: int):
        # filter out images without annotations
        # might lead to errors if an image has no annotations
        while True:
            target = self.coco.loadAnns(self.coco.getAnnIds(self.ids[index]))
            if len(target) == 0:
                index = np.random.randint(0, len(self.ids))
            else:
                break
        # prepare the image
        image_id = self.ids[index]
        image = Image.open(
            os.path.join(self.x_path, self.coco.loadImgs(image_id)[0]["file_name"])
        ).convert("RGB")
        image = torchvision.transforms.ToTensor()(image)
        # augment the image
        if self.transforms is not None:
            image = self.transforms(image)
        # prepare the target
        target = self.coco.loadAnns(self.coco.getAnnIds(image_id))
        if self.annotation_style == "rectangular":
            # "normal" format is [x, y, width, height]
            if self.format == "normal":
                boxes = torch.as_tensor(
                    [x["bbox"] for x in target], dtype=torch.float32
                )
            else:
                # faster rcnn format is [xmin, ymin, xmax, ymax]
                boxes = torch.as_tensor(
                    [
                        [
                            x["bbox"][0],
                            x["bbox"][1],
                            x["bbox"][0] + x["bbox"][2],
                            x["bbox"][1] + x["bbox"][3],
                        ]
                        for x in target
                    ],
                    dtype=torch.float32,
                )
            labels = torch.as_tensor(
                [x["category_id"] for x in target], dtype=torch.int64
            )
            area = torch.as_tensor([x["area"] for x in target], dtype=torch.float32)
            iscrowd = torch.as_tensor([x["iscrowd"] for x in target], dtype=torch.int64)

            # check for integrity of the boxes
            valid_indices = [
                i for i, box in enumerate(boxes) if box[2] > box[0] and box[3] > box[1]
            ]

            target = {
                "boxes": boxes,
                "labels": labels,
                "image_id": torch.tensor([image_id]),
                "area": area,
                "iscrowd": iscrowd,
            }
            # format of labels is [x, y, width, height]
            # target = [target]
            print(len(target), type(target), target) if self.info else None

        elif self.annotation_style == "circular":
            # filter out images without annotations
            while True:
                target = self.coco.loadAnns(self.coco.getAnnIds(self.ids[index]))
                if len(target) == 0:
                    index = np.random.randint(0, len(self.ids))
                else:
                    break
            circle_center = torch.as_tensor(
                [x["circle_center"] for x in target], dtype=torch.float32
            )  # center of the circle is given as [x, y]
            circle_radius = torch.as_tensor(
                [x["circle_radius"] for x in target], dtype=torch.float32
            )
            labels = torch.as_tensor(
                [x["category_id"] for x in target], dtype=torch.int64
            )
            area = torch.as_tensor([x["area"] for x in target], dtype=torch.float32)
            iscrowd = torch.as_tensor([x["iscrowd"] for x in target], dtype=torch.int64)

            # check for integrity of the boxes
            valid_indices = [i for i, radius in enumerate(circle_radius) if radius > 0]
            if len(valid_indices) == 0:
                return None

            target = {
                "circle_center": circle_center,
                "circle_radius": circle_radius,
                "labels": labels,
                "image_id": torch.tensor([image_id]),
                "area": area,
                "iscrowd": iscrowd,
            }
        #    target = [target]
        print(
            len(target), image.shape, np.min(image.numpy()), np.max(image.numpy())
        ) if self.info else None
        return image, target

    # return the number of samples in the dataset
    def __len__(self) -> int:
        return len(self.ids)


paths = {
    "train_y_c": "/content/CRAG_JSON/train/C_Json/bbox_train.json",
    "train_x": "/content/CRAG_JSON/train/Images/",
    "train_y": "/content/CRAG_JSON/train/Json/bbox_train.json",
    "test_y": "/content/CRAG_JSON/test/Json/bbox_test.json",
    "test_x": "/content/CRAG_JSON/test/Images/",
}

paths_2 = {
    "train_y_c": "/content/CRAG_JSON_2/CRAG/train/C_JSON/instances_train.json",
    "train_x": "/content/CRAG_JSON_2/CRAG/train/Images/",
    "train_y": "/content/CRAG_JSON_2/CRAG/train/JSON/instances_train.json",
    "test_y": "/content/CRAG_JSON_2/CRAG/test/JSON/instances_test.json",
    "test_x": "/content/CRAG_JSON_2/CRAG/test/Images/",
}

# get data loaders 
train_dataset = ObjectDetectionDataset(
    paths_2["train_x"], paths_2["train_y"], annotation_style="rectangular"
)
test_dataset = ObjectDetectionDataset(
    paths_2["test_x"], paths_2["test_y"], annotation_style="rectangular"
)
train_dataset_c = ObjectDetectionDataset(
    paths_2["train_x"], paths_2["train_y_c"], annotation_style="circular"
)

# collate function for data loader to return a tuple of images and targets
def collate_fn(batch):
    return tuple(zip(*batch))


BATCH_SIZE = 8
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=0,
    collate_fn=collate_fn,
)
test_dataloader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    collate_fn=collate_fn,
)
train_dataloader_c = torch.utils.data.DataLoader(
    train_dataset_c,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=0,
    collate_fn=collate_fn,
)

# check data integrity
def _info_ex(
    data_loader,
    return_keys: bool = True,
    num_examples: int = 1,
    print_info: bool = False,
    annotation_style: str = "rectangular",
):
    assert annotation_style in ["rectangular", "circular"]
    # circular annotation style has different keys
    # it is following circle net format: https://github.com/hrlblab/CircleNet/tree/master
    target_info = []
    keys = []  # Initialize an empty list to store keys
    for i, (images, targets) in enumerate(data_loader):
        if annotation_style == "rectangular":
            target_info.append(targets[0]["boxes"].shape), target_info.append(
                targets[0]["labels"].shape
            )
        else:
            target_info.append(targets[0]["circle_center"].shape), target_info.append(
                targets[0]["circle_radius"].shape
            )
        if print_info:
            target_shapes = ", ".join([str(shape) for shape in target_info])
            print(
                f"Data Info: \n Shapes -> Image: {images[0].shape}, Target: {target_shapes}, Image Min: {torch.min(images[0])}, Image Max: {torch.max(images[0])}\n "
                f"Types -> Image: {images[0].dtype}, {type(images)} Target: {[target.dtype for target in targets[0].values()]}, {type(targets)}"
            )
        if i == num_examples - 1:
            break
    if return_keys:
        keys = [key for key in targets[0].keys()]  # Collect keys after loop completion
        return keys


_info_ex(train_dataloader, annotation_style="rectangular", print_info=True)

# display some examples
def _ex(data_loader, annotaitons_style:str = "rectangular"):
    images, targets = next(iter(data_loader))
    if annotaitons_style == "rectangular":
        
        image = images[0]
        target = targets[0]
        boxes = target['boxes']

        image = image.permute(1, 2, 0).cpu().numpy()
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        

        for bbox in boxes:
            x, y, w, h = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
            cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)

        plt.figure(figsize=(10,10))
        plt.imshow(image)
            
    else:
        circle_center = targets[0]['circle_center']
        circle_radius = targets[0]['circle_radius']
        for i in range(len(circle_center)):
            x, y = circle_center[i]
            r = circle_radius[i]
            plt.imshow(images[0].permute(1, 2, 0))
            plt.plot([x], [y], 'o')
            plt.show()
            plt.imshow(images[0].permute(1, 2, 0))
            plt.plot([x], [y], 'o')
            circle = plt.Circle((x, y), r, color='r', fill=False)
            plt.gca().add_patch(circle)
            plt.show()

#_ex(train_dataloader, annotaitons_style="rectangular")

In [None]:
# torch faster rcnn models
# by @sovit-123 https://github.com/sovit-123
# code: https://github.com/sovit-123/fasterrcnn-pytorch-training-pipeline/tree/main

""" from fasterrcnn_mobilevit_xss import create_model as fasterrcnn_mobilevit_xss
from fasterrcnn_vitdet_tiny import create_model as fasterrcnn_vitdet_tiny
from fasterrcnn_mbv3_nano import create_model as fasterrcnn_mbv3_nano
from fasterrcnn_convnext_tiny import create_model as fasterrcnn_convnext_tiny
from fasterrcnn_effb0 import create_model as fasterrcnn_effb0
from nano_fasterrcnn import create_model as nano_fasterrcnn
from fasterrcnn_mini_squeezenet1_1_tiny_head import (
    create_model as fasterrcnn_mini_squeezenet1_1_tiny_head,
)
from fasterrcnn_regnet_y_400mf import create_model as fasterrcnn_regnet_y_400mf
from fasterrcnn_resnet50_fpn_v2 import create_model as fasterrcnn_resnet50_fpn_v2
from fasterrcnn_resnet18 import create_model as fasterrcnn_resnet18
from fasterrcnn_mini_darknet_nano_head import (
    create_model as fasterrcnn_mini_darknet_nano_head,
)
from fasterrcnn_darknet import create_model as fasterrcnn_darknet
from fasterrcnn_convnext_small import create_model as fasterrcnn_convnext_small
from fasterrcnn_custom_resnet import create_model as fasterrcnn_custom_resnet """

from fasterrcnn_resnet101 import create_model as fasterrcnn_resnet101

from utils import eval_forward
import numpy as np
import torch
from torch.optim.optimizer import Optimizer
import numpy as np
import time
import csv
import torch.nn.functional as F
import datetime


class Lion(Optimizer):
    def __init__(
        self,
        params,
        lr: float = 1e-4,
        betas: tuple = (0.9, 0.99),
        weight_decay: float = 0.0,
    ):
        assert lr > 0.0
        assert all([0.0 <= beta <= 1.0 for beta in betas])
        defaults = dict(lr=lr, betas=betas, weight_decay=weight_decay)
        super().__init__(params, defaults)

    @torch.no_grad()
    def step(self, closure=None):
        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            for p in filter(lambda p: p.grad is not None, group["params"]):
                # parameter
                grad, lr, wd, beta1, beta2, state = (
                    p.grad,
                    group["lr"],
                    group["weight_decay"],
                    *group["betas"],
                    self.state[p],
                )
                if len(state) == 0:
                    state["exp_avg"] = torch.zeros_like(p)
                exp_avg = state["exp_avg"]

                # Lion optimizer
                p.data.mul_(1 - lr * wd)
                update = exp_avg.clone().lerp_(grad, 1 - beta1)
                p.add_(torch.sign(update), alpha=-lr)
                exp_avg.lerp_(grad, 1 - beta2)
        return loss

# write training and validation losses to csv
def write_to_csv(
    loss_sum: float,
    losses: dict,
    epoch: int,
    header_written: bool = False,
    filename: str = f"train_{datetime.date.today()}.csv",
):
    with open(filename, "a") as f:
        writer = csv.writer(f)
        if not header_written:
            writer.writerow(["epoch", "loss", losses.keys()])
            header_written = True
        writer.writerow([epoch + 1, loss_sum, losses.values()])
    return header_written

# train and test function
def obj_inference(
    model,
    train_generator,
    test_generator,
    device: str,
    num_epochs: int = 5,
    test_after_epoch: bool = True,
    save_model: bool = True,
    save_path: str = "model_epoch_",
    save_to_csv: bool = True,
):
    start = time.time()
    lr_checker = []
    for epoch in range(num_epochs):
        model.train()
        batch = 0
        train_loss_oa = 0
        if len(lr_checker) > 2:
            # Check if validation loss increased for the last two epochs
            if lr_checker[-1] > lr_checker[-2] and lr_checker[-2] > lr_checker[-3]:
                print("Learning rate reduced")
                for param_group in optimizer.param_groups:
                    # reduce learning rate by .1 and weight decay by .1
                    param_group["lr"] = param_group["lr"] * 0.1
                    param_group["weight_decay"] = param_group["weight_decay"] * 0.1
        for x, y in train_generator:
            batch += 1
            x = list(target.to(device) for target in x)
            y = [{k: v.to(device) for k, v in target.items()} for target in y]
            optimizer.zero_grad()
            loss_dict = model(x, y)
            train_loss = sum(loss for loss in loss_dict.values())
            train_loss.backward()
            optimizer.step()
            train_loss_oa += train_loss  # overall loss
            current_time = time.time()
            elapsed_time = current_time - start
            avg_time_per_batch = elapsed_time / batch
            remaining_time = avg_time_per_batch * (len(train_generator) - batch)
            print(
                f"\rEpoch: {epoch + 1}, Batch: {batch}/{len(train_generator)}, Train Loss: {train_loss:.4f}, Elapsed Time: {elapsed_time:.2f}s/{elapsed_time/60:.2f}m, Remaining Time (epoch): {remaining_time:.2f}s/{remaining_time/60:.2f}m",
                end="",
            )
            if save_to_csv:
                header_written = write_to_csv(
                    train_loss,
                    loss_dict,
                    epoch,
                    header_written=True if batch == 1 else False,
                )
        print("\n")
        print(
            f"Epoch: {epoch + 1}, Train Loss: {train_loss_oa / len(train_generator):.4f}"
        )
        print("\n")
        if save_model:
            torch.save(model.state_dict(), save_path + str(epoch + 1))
            torch.save(model, save_path + "cm_" + str(epoch + 1))
        del loss_dict, train_loss_oa, train_loss
        model.eval()
        val_loss_oa = 0
        with torch.no_grad():
            for x, y in test_generator:
                x = list(target.to(device) for target in x)
                y = [{k: v.to(device) for k, v in target.items()} for target in y]
                loss_dict, detections = eval_forward(model, x, y)
                val_loss = sum(loss for loss in loss_dict.values())
                val_loss_oa += val_loss
                print(
                    f"\rValidation Loss: {val_loss:.4f}, Overall Losses: {loss_dict}",
                    end="",
                )
        print("\n")
        validation_loss = val_loss_oa / len(test_dataloader)
        lr_checker.append(validation_loss)
        print(f"Validation Loss: {validation_loss:.4f}\n")
        if save_to_csv:
            header_written = write_to_csv(
                validation_loss,
                loss_dict,
                epoch,
                header_written=True if batch == 1 else False,
                filename=f"test_{datetime.date.today()}.csv",
            )
        del loss_dict, val_loss_oa, val_loss


model = fasterrcnn_resnet101(num_classes=2, pretrained=True, coco_model=False)
optimizer = Lion(model.parameters(), lr=1e-4, weight_decay=1e-5)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)
num_epochs = 30

obj_inference(model, train_dataloader, test_dataloader, device, num_epochs=num_epochs)

In [None]:
import cv2
import torch
import numpy as np
import matplotlib.pyplot as plt

# load model trained model
model.load_state_dict(
    torch.load("/path/to/saved/model.pth", map_location=torch.device("cpu"))
)
model.eval() # model needs to be set to eval mode for it to be able to only take in an image and return predictions
test_images, ground_truth = next(iter(test_dataloader))
test_images = list(image.to(device) for image in test_images)
test_outputs = model(test_images)
test_output = [
    {k: v.to(torch.device("cpu")) for k, v in t.items()} for t in test_outputs
]
# print(test_output)

# predict on a batch of images
def display_test(test_images, predictions, ground_truth, image_index):
    image = test_images[image_index]
    plt.figure(figsize=(12, 6))
    image = image.numpy().transpose((1, 2, 0))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    pred_boxes = predictions[image_index]["boxes"].detach().numpy()
    for bbox in pred_boxes:
        x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
        x, y, w, h = x1, y1, x2 - x1, y2 - y1
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
    gt_boxes = ground_truth[image_index]["boxes"].detach().numpy()
    for bbox in gt_boxes:
        x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
        x, y, w, h = x1, y1, x2 - x1, y2 - y1
        cv2.rectangle(image, (x, y), (x + w, y + h), (255, 0, 0), 2)
    plt.imshow(image)
    plt.show()

    plt.figure(figsize=(12, 6))
    image = test_images[image_index]
    image = image.numpy().transpose((1, 2, 0))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    gt_boxes = ground_truth[image_index]["boxes"].detach().numpy()
    for bbox in gt_boxes:
        x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
        x, y, w, h = x1, y1, x2 - x1, y2 - y1
        cv2.rectangle(image, (x, y), (x + w, y + h), (255, 0, 0), 2)
    plt.imshow(image)
    plt.show()

    plt.figure(figsize=(12, 6))
    image = test_images[image_index]
    image = image.numpy().transpose((1, 2, 0))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    pred_boxes = predictions[image_index]["boxes"].detach().numpy()
    for bbox in pred_boxes:
        x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
        x, y, w, h = x1, y1, x2 - x1, y2 - y1
        cv2.rectangle(
            image,
            (x, y),
            (x + w, y + h),
            (0, 255, 0),
        )
    plt.imshow(image)
    plt.show()


for i in range(len(test_images)):
    display_test(test_images, test_output, ground_truth, i)


def IOU(box1, box2):
    x1, y1, w1, h1 = box1[0], box1[1], box1[2], box1[3]

    x2, y2, w2, h2 = box2[0], box2[1], box2[2], box2[3]

    w_intersection = min(x1 + w1, x2 + w2) - max(x1, x2)

    h_intersection = min(y1 + h1, y2 + h2) - max(y1, y2)

    if w_intersection <= 0 or h_intersection <= 0:
        return 0

    I = w_intersection * h_intersection

    U = w1 * h1 + w2 * h2 - I

    return I / U


""" for i in range(len(ground_truth[1]['boxes'])):
    box1 = test_outputs[1]['boxes'][i].detach().numpy()
    box1 = box1[0], box1[1], box1[2]-box1[0], box1[3]-box1[1]

    box2 = ground_truth[1]['boxes'][i].detach().numpy()
    box2 = box2[0], box2[1], box2[2]-box2[0], box2[3]-box2[1]

    print(IOU(box1, box2)) """

In [None]:
import shutil

# move trained model to drive
shutil.move(
    "/content/model_epoch_4",
    "/content/drive/MyDrive/model_epoch_4_24.08_fasterrcnn_2.pth",
)
# shutil.move("/content/model_epoch_7", "/content/drive/MyDrive/model_epoch_7_22.08_fasterrcnn_vitdet_tiny.pth")