This codebase can be used for many purposes including:

1. **Object Detection Pipeline**:
- Train/evaluate models (Faster R-CNN, Mask R-CNN, etc)
- Process COCO-format datasets
- Calculate evaluation metrics (mAP, IoU)

2. **Key Components Available:**

Data Preparation:
- Image transforms/normalization
- Annotation processing
- Dataset utilities

Training Infrastructure:
- Distributed training setup
- Learning rate scheduling
- Loss tracking

Evaluation Tools:
- COCO metric calculation
- Prediction visualization
- Model performance analysis

3. **Experiment Ideas:**
- Transfer Learning:
- Fine-tune pretrained models on custom data
- Modify head architectures
- Data Augmentation:
- Test different transform combinations
- Visualize augmented samples

4. **Performance Optimization:**
- Benchmark training speed
- Profile memory usage
- Test mixed-precision

5. **Visualization Capabilities:**
- Plot training metrics
- Visualize predictions vs ground truth
- Generate confusion matrices

In [None]:
# In vision_core.ipynb
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torch.utils.data import Dataset, DataLoader
import os
import sys
import math # <<<<------ ADDED IMPORT MATH

# ------------------- PATH SETUP -------------------
notebook_dir = os.path.dirname(os.path.abspath("__file__" if "__file__" in locals() else os.getcwd()))
# Assuming 'vision_core' is a subdirectory within 'lectures'
# and the notebook is also in 'lectures/vision_core/'
# So we need to go up one level from notebook_dir to get to 'lectures'
# then 'vision_core' can be imported.
parent_dir_of_package_root = os.path.dirname(notebook_dir) # This should be 'lectures'
if parent_dir_of_package_root not in sys.path:
    sys.path.append(parent_dir_of_package_root)

from vision_core import utils, engine

# ------------------- DEBUG ANOMALY (Optional) -------------------
# Uncomment the next line for detailed error messages when NaNs first appear.
# This will slow down training, so use it only for debugging.
# torch.autograd.set_detect_anomaly(True)

# ------------------- DEVICE SETUP -------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# ------------------- CUSTOM COLLATE FUNCTION FOR DETECTION -------------------
def collate_fn(batch):
    return tuple(zip(*batch))

# ------------------- MOCK DATASET (Improved Box Generation) -------------------
class FakeDataset(Dataset):
    def __init__(self, num_samples=100, image_size=(3, 300, 400)):
        self.num_samples = num_samples
        self.image_size = image_size
        self.img_height = image_size[1]
        self.img_width = image_size[2]

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        image = torch.rand(self.image_size)
        num_boxes = torch.randint(1, 6, (1,)).item() # 1 to 5 boxes

        boxes_list = []
        labels_list = []

        for _ in range(num_boxes):
            min_box_dim = 10 
            x1 = torch.rand(1).item() * (self.img_width - min_box_dim - 1) 
            y1 = torch.rand(1).item() * (self.img_height - min_box_dim - 1)
            
            width = torch.rand(1).item() * (self.img_width - x1 - min_box_dim) + min_box_dim
            height = torch.rand(1).item() * (self.img_height - y1 - min_box_dim) + min_box_dim
            
            x2 = x1 + width
            y2 = y1 + height

            x1 = max(0, x1)
            y1 = max(0, y1)
            x2 = min(self.img_width -1 , x2) # -1 to keep it strictly within bounds for some ops
            y2 = min(self.img_height -1, y2)

            if x2 <= x1: x2 = x1 + min_box_dim / 2 if x1 + min_box_dim/2 < self.img_width else self.img_width -1
            if y2 <= y1: y2 = y1 + min_box_dim / 2 if y1 + min_box_dim/2 < self.img_height else self.img_height -1
            if x2 <= x1 : x1 = x2 - min_box_dim /2 if x2 - min_box_dim/2 > 0 else 0
            if y2 <= y1 : y1 = y2 - min_box_dim /2 if y2 - min_box_dim/2 > 0 else 0
            
            # Final check to ensure box is valid
            if x1 >= x2: x1 = x2 - 1
            if y1 >= y2: y1 = y2 - 1
            if x1 < 0: x1 = 0
            if y1 < 0: y1 = 0


            boxes_list.append([x1, y1, x2, y2])
            labels_list.append(torch.randint(1, 91, (1,)).item())

        if not boxes_list: 
            boxes_list.append([10, 10, 50, 50])
            labels_list.append(1)
            
        boxes_tensor = torch.tensor(boxes_list, dtype=torch.float32)
        # Ensure no zero area boxes if any slipped through
        valid_areas = (boxes_tensor[:, 2] - boxes_tensor[:, 0]) * (boxes_tensor[:, 3] - boxes_tensor[:, 1]) > 0
        if not torch.all(valid_areas):
             # If any invalid, just create a single valid box
             boxes_tensor = torch.tensor([[10.0, 10.0, 50.0, 50.0]], dtype=torch.float32)
             labels_tensor = torch.tensor([1], dtype=torch.int64)
        else:
            labels_tensor = torch.tensor(labels_list, dtype=torch.int64)

        target = {}
        target["boxes"] = boxes_tensor
        target["labels"] = labels_tensor
        target["image_id"] = torch.tensor([idx], dtype=torch.int64)
        target["area"] = (target["boxes"][:, 3] - target["boxes"][:, 1]) * \
                         (target["boxes"][:, 2] - target["boxes"][:, 0])
        target["iscrowd"] = torch.zeros((boxes_tensor.shape[0],), dtype=torch.uint8)

        return image, target

    def get_coco_api(self):
        from pycocotools.coco import COCO
        mock_coco = COCO()
        mock_coco.dataset = {'images': [{'id': i} for i in range(self.num_samples)], 'annotations': []}
        mock_coco.createIndex()
        return mock_coco

# ------------------- MODEL AND OPTIMIZER -------------------
model = fasterrcnn_resnet50_fpn(pretrained=True, num_classes=91)
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005) # Further reduced LR

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

# ------------------- DATA LOADERS -------------------
train_dataset = FakeDataset(num_samples=200) 
val_dataset = FakeDataset(num_samples=50)

train_loader = DataLoader(
    train_dataset, batch_size=2, shuffle=True, num_workers=0, collate_fn=collate_fn
)
val_loader = DataLoader(
    val_dataset, batch_size=1, shuffle=False, num_workers=0, collate_fn=collate_fn
)

# ------------------- TRAINING AND VALIDATION LOOP (WITH GRADIENT CLIPPING) -------------------
num_epochs = 5 
print(f"Starting training for {num_epochs} epochs...")

for epoch in range(num_epochs):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    current_lr_scheduler = None
    if epoch == 0: # Warmup for the first epoch only
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(train_loader) - 1)
        current_lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)
    
    for i, (images, targets) in enumerate(metric_logger.log_every(train_loader, 10, header)): 
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value): # <<<<------ CORRECTED LINE
            print(f"Loss is {loss_value} at epoch {epoch}, iteration {i}, stopping training")
            print(loss_dict_reduced)
            # Optionally, save the problematic batch for inspection
            # torch.save({'images': images, 'targets': targets}, 'problem_batch.pth')
            sys.exit(1) 

        optimizer.zero_grad()
        losses.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) 
        
        optimizer.step()

        if current_lr_scheduler is not None:
            current_lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])
    
    if lr_scheduler is not None and epoch > 0: # Main LR scheduler steps after warmup phase (epoch 0)
         lr_scheduler.step()
    
    engine.evaluate(model, val_loader, device=device)

print("Training finished.")

Using device: cpu
Starting training for 5 epochs...


In [None]:
# Example of making a prediction (after training)
model.eval()
sample_image, _ = val_dataset[0]
with torch.no_grad():
    prediction = model([sample_image.to(device)])
print("\nExample prediction:")
print(prediction)