In [1]:
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import CocoDetection

# Path to your COCO dataset
data_root = './data-coco/CoCO-dataset/coco2017'

# Define transforms
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # Resize for faster experimentation; adjust as needed
    transforms.ToTensor()
])

# Load training and validation sets
train_dataset = CocoDetection(root=f"{data_root}/train2017", 
                              annFile=f"{data_root}/annotations/instances_train2017.json", 
                              transform=transform)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=False, sampler=None,
           batch_sampler=None, num_workers=0, collate_fn=None,
           pin_memory=False, drop_last=True, timeout=0,
           worker_init_fn=None)


loading annotations into memory...
Done (t=13.26s)
creating index...
index created!


In [2]:
import torch.nn as nn

class LightNN(nn.Module):
    def __init__(self, num_classes=80):  # COCO has 80 classes
        super(LightNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(16 * 16 * 16, 256),  # adjust input size if image resolution changes
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

model = LightNN(num_classes=80).cuda()  # Move model to GPU


In [3]:
import torch.optim as optim
import torch.nn.functional as F

# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()  # For classification tasks
num_epochs=10
# Training loop
for epoch in range(num_epochs):
    model.train()
    for images, targets in train_loader:
        images = images.cuda()
        labels = torch.tensor([target[0]['category_id'] for target in targets]).cuda()  # Assumes single label per image

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")


RuntimeError: each element in list of batch should be of equal size