In [1]:
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.datasets import CocoDetection
import torchvision.transforms as T
from torch.utils.data import DataLoader

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Paths
data_dir = r"C:\Users\alway\OneDrive\Documents\GitHub\Applied-AI\hw2\datasets\coco_dataset"
train_images_dir = f"{data_dir}/train2017"
train_annotations_file = f"{data_dir}/annotations/instances_train2017.json"
val_images_dir = f"{data_dir}/val2017"
val_annotations_file = f"{data_dir}/annotations/instances_val2017.json"

# Data transforms
transform = T.Compose([
    T.ToTensor()
])

# Load datasets
train_dataset = CocoDetection(root=train_images_dir, annFile=train_annotations_file, transform=transform)
val_dataset = CocoDetection(root=val_images_dir, annFile=val_annotations_file, transform=transform)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=4, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False, num_workers=4, collate_fn=lambda x: tuple(zip(*x)))

# Load the pre-trained Faster R-CNN model
model = fasterrcnn_resnet50_fpn(pretrained=True)
model.to(device)

# Replace the classifier with a new one for your number of classes (COCO has 80 classes)
num_classes = 91  # 80 classes + background + 10 "super-categories"
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torch.nn.Linear(in_features, num_classes)

loading annotations into memory...


MemoryError: 

In [None]:

# Replace the final classifier with a new one for your number of classes
num_classes = len(dataset.coco.cats) + 1  # +1 for background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torch.nn.Linear(in_features, num_classes)

# Set up the optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    for images, targets in dataloader:
        # Move images and targets to the appropriate device (GPU/CPU)
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in target.items()} for target in targets]

        # Forward pass
        loss_dict = model(images, targets)

        # Calculate total loss
        losses = sum(loss for loss in loss_dict.values())

        # Backpropagation
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    print(f"Epoch {epoch}, Loss: {losses.item()}")
