In [None]:
import os

import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torchvision
from torchvision import transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

num_classes = 3  # 1 class (person) + background

in_features = model.roi_heads.box_predictor.cls_score.in_features

model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [None]:
model.eval()
pred = model(torch.ones([2, 3, 800, 800]))

In [None]:
class BBoxDataset(Dataset):
    """
    PyTorch datasets for object detection.

    Parameters
    ----------
    images : list of str
        Image's path

    bboxes: list of torch.Tensor
        Each tensor's shape: [[x0, y0, x1, y1], [x0, y0, x1, y1], ...]

    labels: list of torch.Tensor
        Each tensor's shape: [] or [0, 3] or [0, 4 ,5] ...

    transform: torchvision.transforms
    """

    def __init__(self, images, bboxes, labels, transform):
        self.images = images
        self.bboxes = bboxes
        self.labels = labels
        self.transform = transform

    def _path_to_tensor(self, path):
        img = Image.open(path).convert('RGB')
        return self.transform(img)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self._path_to_tensor(self.images[idx])
        bbox = self.bboxes[idx]
        label = self.labels[idx]
        target = {'boxes': bbox.float(), 'labels': label.int()}
        return img, target

In [None]:
# IMG_DIR = './PennFudanPed/PNGImages/'
df = pd.read_csv('annotation.csv')

class_to_idx = {label: idx for idx, label in enumerate(df['label'].unique())}
images = df['filename'].tolist()

In [None]:
class_to_idx

In [None]:
groupby_image = df.groupby('filename')

bboxes = []
labels = []
for img in images:
    rows = groupby_image.get_group(img)

    # Bouding Box
    x_min = rows['x_min'].values
    y_min = rows['y_min'].values
    x_max = rows['x_max'].values
    y_max = rows['y_max'].values
    bbox = np.stack([x_min, y_min, x_max, y_max]).reshape(-1, 4)
    bbox = torch.from_numpy(bbox)
    bboxes.append(bbox)

    # Labels
    label = rows['label'].map(class_to_idx).values
    label = torch.from_numpy(label).view(-1)
    labels.append(label)

In [None]:
transform = transforms.Compose([
    transforms.Resize((800, 800)),
    transforms.ToTensor(),
])
dataset = BBoxDataset(images, bboxes, labels, transform)

In [None]:
def bbox_collate_fn(batch):
    images = []
    targets = []
    for sample in batch:
        image, target = sample
        images.append(image)
        targets.append(target)
    images = torch.stack(images, dim=0)
    return images, targets

In [None]:
loader = DataLoader(dataset, batch_size=4, shuffle=True, drop_last=True, collate_fn=bbox_collate_fn)

In [11]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)
model.train()
# model.eval()

optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)

for i, (inputs, targets) in enumerate(loader):
    inputs = inputs.to(device)
    for idx, target in enumerate(targets):
        for k, v in target.items():
            targets[idx][k] = v.to(device)

    optimizer.zero_grad()

    outputs = model(inputs, targets)
    loss_classifier = outputs['loss_classifier']
    loss_box_reg = outputs['loss_box_reg']
    loss_objectness = outputs['loss_objectness']
    
    loss = loss_classifier + loss_box_reg
    loss.backward()
    print(loss)
    
    optimizer.step()

RuntimeError: expected backend CUDA and dtype Float but got backend CPU and dtype Float

In [None]:
class Trainer:
    """Trainer for FasterRCNN.
    
    Parameters
    ----------
    model: nn.Module
        A model for object detection.
    
    optimizer: torch.optim.Optimizer
        PyTorch Optimizer.
    
    criterion: nn.Module
        Loss function module.
    """
    def __init__(self, model, optimizer, criterion=None):
        self.device = 'cpu'
        self.model = model.to(self.device)
        self.optimizer = optimizer
        self.criterion = criterion

    def epoch_train(self, train_loader):
        self.model.train()
        epoch_loss = 0

        for i, (inputs, targets) in enumerate(train_loader):
            inputs = inputs.to(self.device)

            self.optimizer.zero_grad()

            outputs = self.model(inputs, targets)
            loss_classifier = outputs['loss_classifier']
            loss_box_reg = outputs['loss_box_reg']
            loss_objectness = outputs['loss_objectness']

            loss = loss_classifier + loss_box_reg
            loss.backward()
            self.optimizer.step()
            epoch_loss += loss.item()

        return epoch_loss / len(train_loader)

    def epoch_eval(self, eval_loader):
        self.model.eval()
        epoch_loss = 0

        for i, (inputs, targets) in enumerate(eval_loader):
            inputs = inputs.to(self.device)

            outputs = self.model(inputs)
            print(outputs)
            loss_classifier = outputs['loss_classifier']
            loss_box_reg = outputs['loss_box_reg']
            loss_objectness = outputs['loss_objectness']

            loss = loss_classifier + loss_box_reg
            epoch_loss += loss.item()

        return epoch_loss / len(eval_loader)

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 3  # 1 class (person) + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)

criterion = None

In [None]:
trainer = Trainer(model, optimizer)

In [None]:
num_epochs = 10

for epoch in range(1, num_epochs+1):
    train_loss = trainer.epoch_train(loader)
    valid_loss = trainer.epoch_eval(loader)
    
    print(f'EPOCH: [{epoch}/{num_epochs}]')
    print(f'TRAIN_LOSS: {train_loss}, VALID_LOSS: {valid_loss}')