In [12]:
import torch
import torchvision
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
from torchvision.datasets import VOCDetection
import os
import json
from PIL import Image
import numpy as np
from roboflow import Roboflow

device = "cuda" if torch.cuda.is_available() else "cpu"

In [13]:
from roboflow import Roboflow
rf = Roboflow(api_key="Ulsyziv3V0AUVsbUFjoD")
project = rf.workspace("roboflow-gw7yv").project("vehicles-openimages")
version = project.version(1)
dataset = version.download("coco")
datasetPath = dataset.location

In [14]:
import json
import os

train_annotations = os.path.join(datasetPath, "train", "_annotations.coco.json")
val_annotations = os.path.join(datasetPath, "valid", "_annotations.coco.json")
test_annotations = os.path.join(datasetPath, "test", "_annotations.coco.json")

with open(train_annotations, 'r') as f:
    train_coco = json.load(f)

categories = train_coco['categories']
class_names = [cat['name'] for cat in sorted(categories, key=lambda x: x['id'])]
id_to_class = {cat['id']: cat['name'] for cat in categories}
class_to_id = {cat['name']: cat['id'] for cat in categories}

In [15]:
import torch
from torch.utils.data import Dataset
from PIL import Image
import torchvision.transforms as transforms
import json
import os

class VehicleDataset(Dataset):
    def __init__(self, root_dir, annotation_file, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        
        with open(annotation_file, 'r') as f:
            self.coco_data = json.load(f)
        
        self.images = {img['id']: img for img in self.coco_data['images']}
        self.categories = {cat['id']: cat['name'] for cat in self.coco_data['categories']}
        
        self.image_annotations = {}
        for ann in self.coco_data['annotations']:
            img_id = ann['image_id']
            if img_id not in self.image_annotations:
                self.image_annotations[img_id] = []
            self.image_annotations[img_id].append(ann)
        
        self.image_ids = list(self.image_annotations.keys())
    
    def __len__(self):
        return len(self.image_ids)
    
    def __getitem__(self, idx):
        img_id = self.image_ids[idx]
        img_info = self.images[img_id]
        img_path = os.path.join(self.root_dir, img_info['file_name'])
        
        image = Image.open(img_path).convert('RGB')
        annotations = self.image_annotations[img_id]
        
        boxes = []
        labels = []
        
        for ann in annotations:
            x, y, w, h = ann['bbox']
            x_min = x
            y_min = y
            x_max = x + w
            y_max = y + h
            
            boxes.append([x_min, y_min, x_max, y_max])
            labels.append(ann['category_id'])
        
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        areas = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        
        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id': torch.tensor([img_id]),
            'area': areas,
            'iscrowd': torch.zeros((len(annotations),), dtype=torch.int64)
        }
        
        if self.transform:
            image = self.transform(image)
        
        return image, target

transform = transforms.Compose([
    transforms.ToTensor()
])

train_dataset = VehicleDataset(
    root_dir=os.path.join(datasetPath, 'train'),
    annotation_file=train_annotations,
    transform=transform
)

val_dataset = VehicleDataset(
    root_dir=os.path.join(datasetPath, 'valid'),
    annotation_file=val_annotations,
    transform=transform
)

test_dataset = VehicleDataset(
    root_dir=os.path.join(datasetPath, 'test'),
    annotation_file=test_annotations,
    transform=transform
)

def custom_collate_fn(batch):
    return tuple(zip(*batch))

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=2, collate_fn=custom_collate_fn)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False, num_workers=2, collate_fn=custom_collate_fn)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=2, collate_fn=custom_collate_fn)

In [17]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import torchvision.transforms.functional as F
import random

vehicle_classes = {
    0: 'vehicles',
    1: 'Ambulance', 
    2: 'Bus',
    3: 'Car',
    4: 'Motorcycle',
    5: 'Truck'
}

class_colors = {
    0: 'red',
    1: 'blue',
    2: 'green',
    3: 'orange',
    4: 'purple',
    5: 'brown'
}

def visualize_sample(dataset, indices, title="Sample Images with Ground Truth"):
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    axes = axes.ravel()
    
    for i, idx in enumerate(indices):
        if i >= 6:
            break
            
        image, target = dataset[idx]
        img_pil = F.to_pil_image(image)
        
        ax = axes[i]
        ax.imshow(img_pil)
        
        boxes = target['boxes']
        labels = target['labels']
        
        for box, label in zip(boxes, labels):
            x_min, y_min, x_max, y_max = box
            width = x_max - x_min
            height = y_max - y_min
            
            class_name = vehicle_classes[label.item()]
            color = class_colors[label.item()]
            
            rect = patches.Rectangle(
                (x_min, y_min), width, height,
                linewidth=2, edgecolor=color, facecolor='none'
            )
            ax.add_patch(rect)
            
            ax.text(
                x_min, y_min - 5, class_name,
                color=color, fontsize=10, weight='bold',
                bbox=dict(facecolor='white', alpha=0.8, edgecolor=color)
            )
        
        ax.set_title(f'Image {idx} - {len(labels)} objects')
        ax.axis('off')
    
    for i in range(len(indices), 6):
        axes[i].axis('off')
    
    plt.suptitle(title, fontsize=16)
    plt.tight_layout()
    plt.show()

train_indices = [0, 10, 25, 50, 75, 100]
visualize_sample(train_dataset, train_indices, "Training Set - Ground Truth Annotations")

val_indices = [0, 5, 10, 15, 20, 25]
visualize_sample(val_dataset, val_indices, "Validation Set - Ground Truth Annotations")

In [18]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)
num_classes = 6

in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

model.requires_grad_(False)
model.roi_heads.box_predictor.requires_grad_(True)
model.rpn.requires_grad_(True)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

vehicle_classes = {
    0: 'vehicles',
    1: 'Ambulance', 
    2: 'Bus',
    3: 'Car',
    4: 'Motorcycle',
    5: 'Truck'
}

reverse_vehicle_classes = {v: k for k, v in vehicle_classes.items()}

In [19]:
from tqdm import tqdm
from torchmetrics.detection.mean_ap import MeanAveragePrecision

def train_one_epoch(model, dataloader, optimizer, device):
    model.train()
    total_loss = 0
    num_batches = len(dataloader)
    
    progress_bar = tqdm(dataloader, desc="Training")
    
    for batch_idx, (images, targets) in enumerate(progress_bar):
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in target.items()} for target in targets]
        
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        
        total_loss += losses.item()
        
        avg_loss = total_loss / (batch_idx + 1)
        progress_bar.set_postfix({
            'Loss': f'{losses.item():.4f}',
            'Avg Loss': f'{avg_loss:.4f}'
        })
    
    avg_loss = total_loss / num_batches
    return avg_loss

def validate_model(model, dataloader, device):
    model.eval()
    
    metric = MeanAveragePrecision(
        iou_thresholds=[0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]
    )
    
    progress_bar = tqdm(dataloader, desc="Validating")
    
    with torch.no_grad():
        for images, targets in progress_bar:
            images = [img.to(device) for img in images]
            predictions = model(images)
            
            preds_cpu = []
            targets_cpu = []
            
            for pred in predictions:
                preds_cpu.append({
                    'boxes': pred['boxes'].cpu(),
                    'scores': pred['scores'].cpu(),
                    'labels': pred['labels'].cpu()
                })
            
            for target in targets:
                targets_cpu.append({
                    'boxes': target['boxes'].cpu(),
                    'labels': target['labels'].cpu()
                })
            
            metric.update(preds_cpu, targets_cpu)
    
    metrics = metric.compute()
    return metrics

optimizer = torch.optim.AdamW(
    filter(lambda p: p.requires_grad, model.parameters()), 
    lr=0.0001,
    weight_decay=0.0005
)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [20]:
import time

num_epochs = 5
best_map = 0.0
training_history = {
    'train_loss': [],
    'val_map': [],
    'val_map_50': [],
    'val_map_75': []
}

start_time = time.time()

for epoch in range(num_epochs):
    epoch_start = time.time()
    
    train_loss = train_one_epoch(model, train_loader, optimizer, device)
    val_metrics = validate_model(model, val_loader, device)
    
    scheduler.step()
    current_lr = optimizer.param_groups[0]['lr']
    
    val_map = val_metrics['map'].item()
    val_map_50 = val_metrics['map_50'].item()
    val_map_75 = val_metrics['map_75'].item()
    
    training_history['train_loss'].append(train_loss)
    training_history['val_map'].append(val_map)
    training_history['val_map_50'].append(val_map_50)
    training_history['val_map_75'].append(val_map_75)
    
    epoch_time = time.time() - epoch_start
    
    print(f"\nEpoch {epoch + 1} Results:")
    print(f"  Train Loss: {train_loss:.4f}")
    print(f"  Val mAP@0.5:0.95: {val_map:.4f}")
    print(f"  Val mAP@0.5: {val_map_50:.4f}")
    print(f"  Val mAP@0.75: {val_map_75:.4f}")
    print(f"  Learning Rate: {current_lr:.6f}")
    print(f"  Epoch Time: {epoch_time:.1f}s")
    
    if val_map > best_map:
        best_map = val_map
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'best_map': best_map,
            'training_history': training_history
        }, 'best_vehicle_detector.pth')
        print(f"  ✅ New best mAP! Model saved.")

total_time = time.time() - start_time
print(f"\nTraining completed!")
print(f"Total training time: {total_time:.1f}s ({total_time/60:.1f} minutes)")
print(f"Best validation mAP@0.5:0.95: {best_map:.4f}")

plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
plt.plot(range(1, num_epochs + 1), training_history['train_loss'], 'b-', marker='o')
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid(True)

plt.subplot(1, 3, 2)
plt.plot(range(1, num_epochs + 1), training_history['val_map'], 'r-', marker='o', label='mAP@0.5:0.95')
plt.plot(range(1, num_epochs + 1), training_history['val_map_50'], 'g-', marker='s', label='mAP@0.5')
plt.plot(range(1, num_epochs + 1), training_history['val_map_75'], 'm-', marker='^', label='mAP@0.75')
plt.title('Validation mAP')
plt.xlabel('Epoch')
plt.ylabel('mAP')
plt.legend()
plt.grid(True)

plt.subplot(1, 3, 3)
normalized_loss = [x / max(training_history['train_loss']) for x in training_history['train_loss']]
plt.plot(range(1, num_epochs + 1), normalized_loss, 'b-', marker='o', label='Train Loss (norm)')
plt.plot(range(1, num_epochs + 1), training_history['val_map'], 'r-', marker='o', label='Val mAP@0.5:0.95')
plt.title('Training Progress')
plt.xlabel('Epoch')
plt.ylabel('Normalized Value')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

In [21]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import torchvision.transforms.functional as F

checkpoint = torch.load('best_vehicle_detector.pth')
model.load_state_dict(checkpoint['model_state_dict'])

test_metrics = validate_model(model, test_loader, device)

print("Test Set Results:")
print(f"  mAP@0.5:0.95: {test_metrics['map']:.4f}")
print(f"  mAP@0.5: {test_metrics['map_50']:.4f}")
print(f"  mAP@0.75: {test_metrics['map_75']:.4f}")

def visualize_predictions(model, dataset, indices, confidence_threshold=0.5):
    model.eval()
    
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    axes = axes.ravel()
    
    with torch.no_grad():
        for i, idx in enumerate(indices):
            if i >= 6:
                break
                
            image, target = dataset[idx]
            
            image_tensor = image.unsqueeze(0).to(device)
            prediction = model(image_tensor)[0]
            
            valid_scores = prediction['scores'] > confidence_threshold
            pred_boxes = prediction['boxes'][valid_scores].cpu()
            pred_labels = prediction['labels'][valid_scores].cpu()
            pred_scores = prediction['scores'][valid_scores].cpu()
            
            img_pil = F.to_pil_image(image)
            
            ax = axes[i]
            ax.imshow(img_pil)
            
            gt_boxes = target['boxes']
            gt_labels = target['labels']
            
            for box, label in zip(gt_boxes, gt_labels):
                x_min, y_min, x_max, y_max = box
                width = x_max - x_min
                height = y_max - y_min
                
                class_name = vehicle_classes[label.item()]
                
                rect = patches.Rectangle(
                    (x_min, y_min), width, height,
                    linewidth=2, edgecolor='red', facecolor='none'
                )
                ax.add_patch(rect)
                
                ax.text(
                    x_min, y_min - 20, f'GT: {class_name}',
                    color='red', fontsize=9, weight='bold',
                    bbox=dict(facecolor='white', alpha=0.8, edgecolor='red')
                )
            
            for box, label, score in zip(pred_boxes, pred_labels, pred_scores):
                x_min, y_min, x_max, y_max = box
                width = x_max - x_min
                height = y_max - y_min
                
                class_name = vehicle_classes[label.item()]
                
                rect = patches.Rectangle(
                    (x_min, y_min), width, height,
                    linewidth=2, edgecolor='green', facecolor='none'
                )
                ax.add_patch(rect)
                
                ax.text(
                    x_min, y_max + 5, f'Pred: {class_name} ({score:.2f})',
                    color='green', fontsize=9, weight='bold',
                    bbox=dict(facecolor='white', alpha=0.8, edgecolor='green')
                )
            
            ax.set_title(f'Test Image {idx}\nGT: {len(gt_labels)} objects, Pred: {len(pred_labels)} objects')
            ax.axis('off')
    
    for i in range(len(indices), 6):
        axes[i].axis('off')
    
    plt.suptitle(f'Predictions vs Ground Truth (Confidence > {confidence_threshold})', fontsize=16)
    plt.tight_layout()
    plt.show()

test_indices = [0, 5, 10, 15, 20, 25]
visualize_predictions(model, test_dataset, test_indices, confidence_threshold=0.5)

fig, axes = plt.subplots(1, 3, figsize=(18, 6))

test_idx = 10
image, target = test_dataset[test_idx]
img_pil = F.to_pil_image(image)

thresholds = [0.3, 0.5, 0.7]

model.eval()
with torch.no_grad():
    image_tensor = image.unsqueeze(0).to(device)
    prediction = model(image_tensor)[0]
    
    for i, threshold in enumerate(thresholds):
        ax = axes[i]
        ax.imshow(img_pil)
        
        valid_scores = prediction['scores'] > threshold
        pred_boxes = prediction['boxes'][valid_scores].cpu()
        pred_labels = prediction['labels'][valid_scores].cpu()
        pred_scores = prediction['scores'][valid_scores].cpu()
        
        for box, label, score in zip(pred_boxes, pred_labels, pred_scores):
            x_min, y_min, x_max, y_max = box
            width = x_max - x_min
            height = y_max - y_min
            
            class_name = vehicle_classes[label.item()]
            
            rect = patches.Rectangle(
                (x_min, y_min), width, height,
                linewidth=2, edgecolor='green', facecolor='none'
            )
            ax.add_patch(rect)
            
            ax.text(
                x_min, y_min - 5, f'{class_name} ({score:.2f})',
                color='green', fontsize=10, weight='bold',
                bbox=dict(facecolor='white', alpha=0.8, edgecolor='green')
            )
        
        ax.set_title(f'Threshold: {threshold}\nDetections: {len(pred_labels)}')
        ax.axis('off')

plt.suptitle('Effect of Confidence Threshold', fontsize=16)
plt.tight_layout()
plt.show()