In [81]:
import os
from PIL import Image
import torch
import json
from torchvision.transforms import ToTensor

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.image_dir = os.path.join(root_dir, "images")
        self.label_dir = os.path.join(root_dir, "labels")
        self.transform = transform
        self.image_files = sorted(os.listdir(self.image_dir))
        
        # Moved label_map to __init__ for efficiency
        self.label_map = {
            "car": 0,
            "truck": 1,
            "bus": 2,
            "special_vehicle": 3,
            "motorcycle": 4,
            "bicycle": 5,
            "pedestrian": 6,
            "traffic_sign": 7,
            "traffic_light": 8,
            "none": 9
        }


        
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_files[idx])
        label_path = img_path.replace("images", "labels").replace(".jpg", ".json")
        
        # Load image and convert to tensor
        image = Image.open(img_path)
        if self.transform:
            image = self.transform(image)
        else:
            image = ToTensor()(image)
        
        # Load label data
        with open(label_path, 'r') as file:
            label_data = json.load(file)
        boxes = [anno['Coordinate'] for anno in label_data['Annotation']]
        labels = [anno['Label'] for anno in label_data['Annotation']]
        
        # Convert string labels to integers using label_map
        labels = [self.label_map[label] for label in labels]
        
        target = {}
        converted_boxes = [[box[0], box[1], box[0]+box[2], box[1]+box[3]] for box in boxes]
        target["boxes"] = torch.tensor(converted_boxes, dtype=torch.float32)
        target["labels"] = torch.tensor(labels, dtype=torch.int64)
        
        return image, target


In [82]:
from torch.utils.data import DataLoader

train_dataset = CustomDataset(root_dir="2DB/training")
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0, collate_fn=lambda x: tuple(zip(*x)))

val_dataset = CustomDataset(root_dir="2DB/validation")
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=0, collate_fn=lambda x: tuple(zip(*x)))

test_dataset = CustomDataset(root_dir="2DB/test")
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=0, collate_fn=lambda x: tuple(zip(*x)))

In [83]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn

def get_model(num_classes):
    model = fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
    return model

model = get_model(num_classes=10)  # 3 classes + background


In [84]:
# Using the provided CustomDataset class to iterate through the dataset and check labels
dataset_path = "2DB/training"  # You'll need to replace this with your actual dataset path
dataset = CustomDataset(dataset_path)

max_label = -float('inf')
min_label = float('inf')

# Iterate through the dataset to find max and min labels
for _, target in dataset:
    labels = target["labels"]
    max_label = max(max_label, labels.max().item())
    min_label = min(min_label, labels.min().item())

min_label, max_label


(0, 9)

In [None]:
import torchvision
import json
from torch.optim import SGD
from torchvision.models.detection import fasterrcnn_resnet50_fpn

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model = get_model(num_classes=10)
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

num_epochs = 10

for epoch in range(num_epochs):
    # Initialize training metrics
    total_loss = 0.0
    num_batches = 0
    
    # Train for one epoch
    model.train()
    for images, targets in train_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        
        # Update training metrics
        total_loss += losses.item()
        num_batches += 1
        
        # Optionally print batch-level metrics
        # print(f"Epoch {epoch+1}, Batch {num_batches}, Loss: {losses.item():.4f}")
    
    avg_train_loss = total_loss / num_batches
    print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {avg_train_loss:.4f}")
    
    # Validate the model (optional)
    model.eval()
    # Initialize validation metrics
    total_val_loss = 0.0
    num_val_batches = 0
    with torch.no_grad():
        for images, targets in val_loader:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            
            # Compute validation loss
            val_loss_dict = model(images, targets)
            val_losses = sum(loss for loss in val_loss_dict.values())
            
            # Update validation metrics
            total_val_loss += val_losses.item()
            num_val_batches += 1
    
    avg_val_loss = total_val_loss / num_val_batches
    print(f"Epoch {epoch+1}/{num_epochs}, Validation Loss: {avg_val_loss:.4f}")
