<a href="https://colab.research.google.com/github/NamishBansal15/transformer-modeling-25/blob/main/T30Faster_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install pycocotools
!pip install albumentations
!pip install matplotlib seaborn

Looking in indexes: https://download.pytorch.org/whl/cu121


In [1]:
# ------------------------------
# Imports
# ------------------------------
import os
import json
import random
import torch
from torch.utils.data import DataLoader
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.datasets import CocoDetection
from torchvision import transforms as T
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from tqdm import tqdm
import numpy as np

# ------------------------------
# Paths
# ------------------------------
images_dir = "/content/images/"
annotations_file = "/content/_annotations.coco.json"
output_dir = "/content/"

# ------------------------------
# Step 1: Split COCO JSON into train/val/test
# ------------------------------
with open(annotations_file, "r") as f:
    coco = json.load(f)

images = coco["images"]
annotations = coco["annotations"]
categories = coco["categories"]

random.shuffle(images)
N = len(images)
train_cut = int(0.7 * N)
val_cut = int(0.9 * N)

train_images = images[:train_cut]
val_images = images[train_cut:val_cut]
test_images = images[val_cut:]

def filter_annotations(img_list):
    img_ids = set(img["id"] for img in img_list)
    return [ann for ann in annotations if ann["image_id"] in img_ids]

splits = {
    "train": (train_images, filter_annotations(train_images)),
    "val": (val_images, filter_annotations(val_images)),
    "test": (test_images, filter_annotations(test_images)),
}

minimal_info = {
    "info": {"description": "Custom dataset", "version": "1.0"},
    "licenses": [{"id": 1, "name": "None", "url": "None"}]
}

for split_name, (img_list, ann_list) in splits.items():
    out_json = {
        **minimal_info,
        "images": img_list,
        "annotations": ann_list,
        "categories": categories
    }
    path = os.path.join(output_dir, f"instances_{split_name}.json")
    with open(path, "w") as f:
        json.dump(out_json, f, indent=2)
    print(f"{split_name} split saved: {len(img_list)} images, {len(ann_list)} annotations")

# ------------------------------
# Step 2: Transforms
# ------------------------------
def get_transform(train=True):
    t = [T.ToTensor()]
    if train:
        t.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(t)

# ------------------------------
# Step 3: CocoDetection wrapper with transform
# ------------------------------
class CocoDetectionTransformed(CocoDetection):
    def __init__(self, root, annFile, transform=None):
        super().__init__(root, annFile)
        self.transform = transform

    def __getitem__(self, idx):
        img, target = super().__getitem__(idx)
        if self.transform:
            img = self.transform(img)
        return img, target

# ------------------------------
# Step 4: DataLoaders
# ------------------------------
def collate_fn(batch):
    return tuple(zip(*batch))

train_dataset = CocoDetectionTransformed(images_dir, os.path.join(output_dir,"instances_train.json"), transform=get_transform(True))
val_dataset = CocoDetectionTransformed(images_dir, os.path.join(output_dir,"instances_val.json"), transform=get_transform(False))
test_dataset = CocoDetectionTransformed(images_dir, os.path.join(output_dir,"instances_test.json"), transform=get_transform(False))

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)

# ------------------------------
# Step 5: Faster R-CNN model
# ------------------------------
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 2  # 1 class + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model.to(device)

# ------------------------------
# Step 6: Optimizer
# ------------------------------
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
num_epochs = 50

# ------------------------------
# Step 7: Helper to transform targets
# ------------------------------
def transform_targets(targets):
    transformed = []
    for t in targets:
        boxes = []
        labels = []
        image_id = t[0]["image_id"] if t else 0
        for ann in t:
            x, y, w, h = ann["bbox"]
            boxes.append([x, y, x+w, y+h])
            labels.append(1)  # single class
        if boxes:
            boxes = torch.tensor(boxes, dtype=torch.float32)
            labels = torch.tensor(labels, dtype=torch.int64)
        else:
            boxes = torch.zeros((0,4), dtype=torch.float32)
            labels = torch.zeros((0,), dtype=torch.int64)
        transformed.append({"boxes": boxes, "labels": labels, "image_id": torch.tensor([image_id])})
    return transformed

# ------------------------------
# Step 8: Training Loop
# ------------------------------
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, targets in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        images = list(img.to(device) for img in images)
        targets = transform_targets(targets)
        targets = [{k:v.to(device) for k,v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        running_loss += losses.item()
    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {running_loss/len(train_loader):.4f}")

# ------------------------------
# Step 9: Evaluation
# ------------------------------
def evaluate_model_rf(model, data_loader, device, save_json="preds.json"):
    model.eval()
    results = []
    with torch.no_grad():
        for images, targets in tqdm(data_loader):
            images = [img.to(device) for img in images]
            outputs = model(images)
            for target, output in zip(targets, outputs):
                image_id = int(target[0]["image_id"]) if target else 0
                boxes = output["boxes"].cpu().numpy()
                scores = output["scores"].cpu().numpy()
                labels = output["labels"].cpu().numpy()
                for box, score, label in zip(boxes, scores, labels):
                    x1,y1,x2,y2 = box
                    results.append({
                        "image_id": image_id,
                        "category_id": 1,
                        "bbox": [float(x1), float(y1), float(x2-x1), float(y2-y1)],
                        "score": float(score)
                    })
    with open(save_json, "w") as f:
        json.dump(results, f, indent=2)
    return save_json

def compute_rf_metrics(gt_json, pred_json, iou_thresh=0.5):
    coco_gt = COCO(gt_json)
    coco_dt = coco_gt.loadRes(pred_json)
    coco_eval = COCOeval(coco_gt, coco_dt, 'bbox')
    coco_eval.params.iouThrs = np.array([iou_thresh])
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()
    # Extract Roboflow-style metrics
    mAP_05 = coco_eval.stats[0]  # AP at IoU=0.5:0.95, but with iouThrs set to 0.5 only
    precision = coco_eval.stats[1]  # Precision at IoU=0.5
    recall = coco_eval.stats[8]     # Recall at IoU=0.5
    return mAP_05, precision, recall

# ------------------------------
# Step 10: Run evaluation
# ------------------------------
print("Running Roboflow-style evaluation on validation set...")
pred_val = evaluate_model_rf(model, val_loader, device, save_json="val_preds.json")
mAP_05_val, precision_val, recall_val = compute_rf_metrics(os.path.join(output_dir,"instances_val.json"), "val_preds.json")
print(f"Validation - mAP@0.5: {mAP_05_val:.3f}, Precision: {precision_val:.3f}, Recall: {recall_val:.3f}")

print("Running Roboflow-style evaluation on test set...")
pred_test = evaluate_model_rf(model, test_loader, device, save_json="test_preds.json")
mAP_05_test, precision_test, recall_test = compute_rf_metrics(os.path.join(output_dir,"instances_test.json"), "test_preds.json")
print(f"Test - mAP@0.5: {mAP_05_test:.3f}, Precision: {precision_test:.3f}, Recall: {recall_test:.3f}")

train split saved: 21 images, 60 annotations
val split saved: 6 images, 21 annotations
test split saved: 3 images, 14 annotations
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!




Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


100%|██████████| 160M/160M [00:00<00:00, 192MB/s]
Epoch 1/50: 100%|██████████| 6/6 [00:06<00:00,  1.01s/it]


Epoch 1/50 - Loss: 0.6626


Epoch 2/50: 100%|██████████| 6/6 [00:03<00:00,  1.53it/s]


Epoch 2/50 - Loss: 0.5856


Epoch 3/50: 100%|██████████| 6/6 [00:03<00:00,  1.53it/s]


Epoch 3/50 - Loss: 0.5164


Epoch 4/50: 100%|██████████| 6/6 [00:03<00:00,  1.50it/s]


Epoch 4/50 - Loss: 0.5248


Epoch 5/50: 100%|██████████| 6/6 [00:03<00:00,  1.52it/s]


Epoch 5/50 - Loss: 0.5085


Epoch 6/50: 100%|██████████| 6/6 [00:03<00:00,  1.51it/s]


Epoch 6/50 - Loss: 0.4968


Epoch 7/50: 100%|██████████| 6/6 [00:04<00:00,  1.48it/s]


Epoch 7/50 - Loss: 0.4901


Epoch 8/50: 100%|██████████| 6/6 [00:04<00:00,  1.49it/s]


Epoch 8/50 - Loss: 0.5437


Epoch 9/50: 100%|██████████| 6/6 [00:04<00:00,  1.49it/s]


Epoch 9/50 - Loss: 0.4426


Epoch 10/50: 100%|██████████| 6/6 [00:04<00:00,  1.47it/s]


Epoch 10/50 - Loss: 0.5420


Epoch 11/50: 100%|██████████| 6/6 [00:04<00:00,  1.47it/s]


Epoch 11/50 - Loss: 0.3951


Epoch 12/50: 100%|██████████| 6/6 [00:04<00:00,  1.46it/s]


Epoch 12/50 - Loss: 0.4527


Epoch 13/50: 100%|██████████| 6/6 [00:04<00:00,  1.43it/s]


Epoch 13/50 - Loss: 0.4788


Epoch 14/50: 100%|██████████| 6/6 [00:04<00:00,  1.45it/s]


Epoch 14/50 - Loss: 0.3784


Epoch 15/50: 100%|██████████| 6/6 [00:04<00:00,  1.44it/s]


Epoch 15/50 - Loss: 0.3344


Epoch 16/50: 100%|██████████| 6/6 [00:04<00:00,  1.43it/s]


Epoch 16/50 - Loss: 0.2966


Epoch 17/50: 100%|██████████| 6/6 [00:04<00:00,  1.44it/s]


Epoch 17/50 - Loss: 0.2526


Epoch 18/50: 100%|██████████| 6/6 [00:04<00:00,  1.42it/s]


Epoch 18/50 - Loss: 0.2693


Epoch 19/50: 100%|██████████| 6/6 [00:04<00:00,  1.42it/s]


Epoch 19/50 - Loss: 0.2193


Epoch 20/50: 100%|██████████| 6/6 [00:04<00:00,  1.42it/s]


Epoch 20/50 - Loss: 0.2239


Epoch 21/50: 100%|██████████| 6/6 [00:04<00:00,  1.39it/s]


Epoch 21/50 - Loss: 0.2192


Epoch 22/50: 100%|██████████| 6/6 [00:04<00:00,  1.41it/s]


Epoch 22/50 - Loss: 0.1979


Epoch 23/50: 100%|██████████| 6/6 [00:04<00:00,  1.41it/s]


Epoch 23/50 - Loss: 0.1727


Epoch 24/50: 100%|██████████| 6/6 [00:04<00:00,  1.40it/s]


Epoch 24/50 - Loss: 0.1578


Epoch 25/50: 100%|██████████| 6/6 [00:04<00:00,  1.42it/s]


Epoch 25/50 - Loss: 0.1709


Epoch 26/50: 100%|██████████| 6/6 [00:04<00:00,  1.43it/s]


Epoch 26/50 - Loss: 0.1646


Epoch 27/50: 100%|██████████| 6/6 [00:04<00:00,  1.42it/s]


Epoch 27/50 - Loss: 0.1994


Epoch 28/50: 100%|██████████| 6/6 [00:04<00:00,  1.43it/s]


Epoch 28/50 - Loss: 0.1571


Epoch 29/50: 100%|██████████| 6/6 [00:04<00:00,  1.44it/s]


Epoch 29/50 - Loss: 0.1568


Epoch 30/50: 100%|██████████| 6/6 [00:04<00:00,  1.42it/s]


Epoch 30/50 - Loss: 0.2010


Epoch 31/50: 100%|██████████| 6/6 [00:04<00:00,  1.44it/s]


Epoch 31/50 - Loss: 0.2056


Epoch 32/50: 100%|██████████| 6/6 [00:04<00:00,  1.44it/s]


Epoch 32/50 - Loss: 0.1935


Epoch 33/50: 100%|██████████| 6/6 [00:04<00:00,  1.42it/s]


Epoch 33/50 - Loss: 0.1825


Epoch 34/50: 100%|██████████| 6/6 [00:04<00:00,  1.44it/s]


Epoch 34/50 - Loss: 0.1828


Epoch 35/50: 100%|██████████| 6/6 [00:04<00:00,  1.43it/s]


Epoch 35/50 - Loss: 0.1337


Epoch 36/50: 100%|██████████| 6/6 [00:04<00:00,  1.42it/s]


Epoch 36/50 - Loss: 0.1170


Epoch 37/50: 100%|██████████| 6/6 [00:04<00:00,  1.43it/s]


Epoch 37/50 - Loss: 0.1296


Epoch 38/50: 100%|██████████| 6/6 [00:04<00:00,  1.41it/s]


Epoch 38/50 - Loss: 0.1156


Epoch 39/50: 100%|██████████| 6/6 [00:04<00:00,  1.43it/s]


Epoch 39/50 - Loss: 0.1180


Epoch 40/50: 100%|██████████| 6/6 [00:04<00:00,  1.42it/s]


Epoch 40/50 - Loss: 0.1241


Epoch 41/50: 100%|██████████| 6/6 [00:04<00:00,  1.41it/s]


Epoch 41/50 - Loss: 0.1213


Epoch 42/50: 100%|██████████| 6/6 [00:04<00:00,  1.42it/s]


Epoch 42/50 - Loss: 0.1307


Epoch 43/50: 100%|██████████| 6/6 [00:04<00:00,  1.43it/s]


Epoch 43/50 - Loss: 0.1368


Epoch 44/50: 100%|██████████| 6/6 [00:04<00:00,  1.42it/s]


Epoch 44/50 - Loss: 0.1253


Epoch 45/50: 100%|██████████| 6/6 [00:04<00:00,  1.43it/s]


Epoch 45/50 - Loss: 0.1045


Epoch 46/50: 100%|██████████| 6/6 [00:04<00:00,  1.43it/s]


Epoch 46/50 - Loss: 0.0930


Epoch 47/50: 100%|██████████| 6/6 [00:04<00:00,  1.42it/s]


Epoch 47/50 - Loss: 0.0871


Epoch 48/50: 100%|██████████| 6/6 [00:04<00:00,  1.43it/s]


Epoch 48/50 - Loss: 0.0909


Epoch 49/50: 100%|██████████| 6/6 [00:04<00:00,  1.43it/s]


Epoch 49/50 - Loss: 0.1044


Epoch 50/50: 100%|██████████| 6/6 [00:04<00:00,  1.42it/s]


Epoch 50/50 - Loss: 0.1268
Running Roboflow-style evaluation on validation set...


100%|██████████| 6/6 [00:00<00:00, 10.98it/s]


loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.50 | area=   all | maxDets=100 ] = 0.195
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.195
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.50 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.50 | area=medium | maxDets=100 ] = 0.275
 Average Precision  (AP) @[ IoU=0.50:0.50 | area= large | maxDets=100 ] = 0.149
 Average Recall     (AR) @[ IoU=0.50:0.50 | area=   all | maxDets=  1 ] = 0.095
 Average Recall     (AR) @[ IoU=0.50:0.50 | area=   all | maxDets= 10 ] = 0.381
 Average Recall     (AR) @[ IoU=0.50:0.50 | area=   all | maxDe

100%|██████████| 3/3 [00:00<00:00, 11.07it/s]

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.00s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.50 | area=   all | maxDets=100 ] = 0.079
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.079
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.50 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.50 | area=medium | maxDets=100 ] = 0.109
 Average Precision  (AP) @[ IoU=0.50:0.50 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.50 | area=   all | maxDets=  1 ] = 0.071
 Average Recall     (AR) @[ IoU=0.50:0.50 | area=   all | maxDets= 10 ] = 0.071
 Average Recall     (AR) @[ IoU=0.50:0.50 | area=   all | maxDe




In [None]:
import os
import shutil

folder = '/content/images/'
for filename in os.listdir(folder):
    file_path = os.path.join(folder, filename)
    try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
            os.unlink(file_path)
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)
    except Exception as e:
        print(f'Failed to delete {file_path}. Reason: {e}')