In [1]:
import os, sys
print("Working dir:", os.getcwd())
print("Python Path Contains project roor:", os.getcwd() in sys.path)
if os.getcwd() not in sys.path:
    sys.path.insert(0, os.getcwd())

Working dir: /home/toji339/Documents/Sem-5/Computer Vision/Road-Lane-Detection-and-Object-Detection
Python Path Contains project roor: False


In [2]:
# 1. imports and device
import importlib
import time
from types import SimpleNamespace

import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# import your modules
import drive_dataloader, drive_object_detection_model, utils, train, test, losses

importlib.reload(drive_dataloader)
importlib.reload(drive_object_detection_model)
importlib.reload(utils)
importlib.reload(train)
importlib.reload(test)
importlib.reload(losses)


Using device: cpu


<module 'losses' from '/home/toji339/Documents/Sem-5/Computer Vision/Road-Lane-Detection-and-Object-Detection/losses.py'>

In [3]:
train_loader, val_loader, test_loader = drive_dataloader.make_loaders(
    root=os.path.join(os.getcwd(), "Datasets", "Drive_India"),
    batch_size=4, input_size=640, num_workers=2
)
print("Train samples:", len(train_loader.dataset))
images, targets = next(iter(train_loader))
print("Batch images:", images.shape, "num targets:", len(targets))


Train samples: 8333
Batch images: torch.Size([4, 3, 640, 640]) num targets: 4


In [4]:
# 2. hyperparams & paths (edit these)
cfg = SimpleNamespace()

cfg.data_root = "./Datasets/Drive_India"     # path to dataset
cfg.batch_size = 8
cfg.input_size = 512
cfg.num_workers = 0
cfg.num_classes = 24
cfg.epochs = 1
cfg.lr = 0.01
cfg.step_step = 8
cfg.gamma = 0.1
cfg.log_dir = "./runs"
cfg.ckpt_dir = "./checkpoints"
cfg.test_after_epoch = False   # set True to evaluate test each epoch (slower)
cfg.seed = 42

# reproducibility (not perfect but helps)
import random, numpy as np
random.seed(cfg.seed); np.random.seed(cfg.seed); torch.manual_seed(cfg.seed)
if torch.cuda.is_available(): torch.cuda.manual_seed_all(cfg.seed)

print(cfg)


namespace(data_root='./Datasets/Drive_India', batch_size=8, input_size=512, num_workers=0, num_classes=24, epochs=1, lr=0.01, step_step=8, gamma=0.1, log_dir='./runs', ckpt_dir='./checkpoints', test_after_epoch=False, seed=42)


In [5]:
# 4. create model, anchors, optimizer
net = drive_object_detection_model.DriveSingleShotModel(num_classes=cfg.num_classes, input_size=cfg.input_size).to(device)
anchors = utils.build_anchors_for_model(net, input_size=cfg.input_size)   # normalized anchors tensor
optimizer = torch.optim.SGD(net.parameters(), lr=cfg.lr, momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=cfg.step_step, gamma=cfg.gamma)

print("Model created. #params: %.2f M" % (sum(p.numel() for p in net.parameters())/1e6))
print("Anchors:", anchors.shape)


Model created. #params: 6.14 M
Anchors: torch.Size([64512, 4])


In [6]:
# one_image_train.py (notebook cell)
import os, torch
from torch.utils.data import Subset, DataLoader
from drive_dataloader import DriveIndiaDataset, collate_fn, move_batch_to_device
from drive_object_detection_model import DriveSingleShotModel
from utils import build_anchors_for_model
from losses import compute_losses

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ROOT = os.path.join(os.getcwd(), "Datasets", "Drive_India")

# 1) build dataset and pick one index (e.g., first image of train1)
dataset = DriveIndiaDataset(ROOT, split="train1", input_size=640, augment=False)
single_idx = 0   # change if you want another image
single_ds = Subset(dataset, [single_idx])

loader = DataLoader(single_ds, batch_size=1, shuffle=False, num_workers=0,
                    pin_memory=False, collate_fn=collate_fn)

# 2) model, anchors, optimizer
model = DriveSingleShotModel(num_classes=24, input_size=640).to(device)
anchors = build_anchors_for_model(model, input_size=640)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)

# 3) single training step (one epoch with one batch)
model.train()
for images, targets in loader:
    images, targets = move_batch_to_device((images, targets), device)
    outputs = model(images)
    loss, parts = compute_losses(outputs, anchors, targets, model.input_size, num_classes=24)
    print("Loss before step:", loss.item(), parts)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print("Step done — loss after update (recompute):")
    with torch.no_grad():
        outputs2 = model(images)
        loss2, parts2 = compute_losses(outputs2, anchors, targets, model.input_size, num_classes=24)
    print("Loss after step:", loss2.item(), parts2)
    break


Loss before step: 390941.84375 {'cls': 390366.15625, 'box': 287.8451843261719, 'pos': 151}
Step done — loss after update (recompute):
Loss after step: 3923.146484375 {'cls': 3397.6669921875, 'box': 262.73968505859375, 'pos': 151}


In [None]:
# Evaluate current `net` on a small subset and on full val/test if desired
import time
from utils import decode_outputs, compute_map50, move_batch_to_device
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net.to(device)
net.eval()

def evaluate_loader(loader, max_batches=None, conf_thresh=0.05):
    preds=[]; gts=[]
    t0 = time.time()
    with torch.no_grad():
        for i, (images, targets) in enumerate(tqdm(loader, desc="Eval loader")):
            if max_batches is not None and i >= max_batches: break
            images, targets = move_batch_to_device((images, targets), device)
            outputs = net(images)
            batch_preds = decode_outputs(outputs, anchors, input_size=net.input_size, conf_thresh=conf_thresh, iou_thres=0.5)
            preds.extend(batch_preds); gts.extend(targets)
    mAP50, per_class_ap = compute_map50(preds, gts, num_classes=cfg.num_classes)
    return mAP50, per_class_ap, time.time() - t0

# Quick smoke: evaluate on 10 val batches (fast)
quick_map, per_class, sec = evaluate_loader(val_loader, max_batches=10)
print(f"Quick val (10 batches) mAP50: {quick_map:.4f}  time: {sec:.1f}s")

# Full val (uncomment when ready; may be slow)
#full_val_map, _, sec_full = evaluate_loader(val_loader, max_batches=None)
#print(f"Full val mAP50: {full_val_map:.4f}  time: {sec_full:.1f}s")

# Quick test (10 batches)
quick_test_map, _, sec_test = evaluate_loader(test_loader, max_batches=10)
print(f"Quick test (10 batches) mAP50: {quick_test_map:.4f}  time: {sec_test:.1f}s")


Device: cpu
Forward+backward time: 0.432s | loss: 88903.359 | parts: {'cls': 88757.5625, 'box': 72.89772033691406, 'pos': 39}
Fast decode time: 0.011s | detections in batch[0]: 200


RuntimeError: Can't call numpy() on Tensor that requires grad. Use tensor.detach().numpy() instead.