In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from dataset import TrainDataset
from tqdm import tqdm

```python
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
# For training
images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 11, 4)
labels = torch.randint(1, 91, (4, 11))
images = list(image for image in images)
targets = []
for i in range(len(images)):
    d = {}
    d['boxes'] = boxes[i]
    d['labels'] = labels[i]
    targets.append(d)
output = model(images, targets)
# For inference
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)

# optionally, if you want to export the model to ONNX:
torch.onnx.export(model, x, "faster_rcnn.onnx", opset_version = 11)
```

In [2]:
import torch
import torchvision
from torch.utils.data import DataLoader

In [3]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

def get_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    print(model, file=open('Faster-RCNN.txt', 'w'))

    return model

def get_optimizer(model):
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.Adam(params, lr=1e-4)
    return optimizer

In [4]:
def collate_fn(batch):
    return tuple(zip(*batch))

def load_model():
    model = torch.load(os.path.join('model', 'faster-rcnn-1580.pkl'))
    return model

def train():
    mat_path = os.path.join('..', 'data', "train answer", 'digitStruct.mat')
    print(f'mat_path: {mat_path}')
    image_dir = os.path.join('..', 'data', 'train')
    print(f'image_dir: {image_dir}')

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f'device: {device}')

    dataset = TrainDataset(mat_path, image_dir)
    # data_loader = torch.utils.data.DataLoader(
    #     dataset, batch_size=2, shuffle=False, num_workers=4,
    #     collate_fn=collate_fn)
    data_loader = DataLoader(dataset, batch_size=4, collate_fn=collate_fn)

    # model = get_model(num_classes=11)
    model = load_model()
    model.train()
    model.to(device)

    optimizer = get_optimizer(model)

    best_losses = None

    epochs = 10
    print(f'epochs: {epochs}')
    for epoch in range(epochs):
        print(f'epoch {epoch} strat!')
        epoch_losses = None

        pbar = tqdm(data_loader)

        # print(f'len dataset: {len(dataset)}')
        for i, (images, targets) in enumerate(pbar):
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            loss_dict = model(images, targets)

            loss_message = []
            for k in loss_dict.keys():
                loss_message.append(f'{k}: {loss_dict[k]:.4f}')

            losses = sum(loss for loss in loss_dict.values())
            if epoch_losses is None:
                epoch_losses = losses
            else:
                epoch_losses += losses

            loss_message.append(f'epoch_losses: {epoch_losses/i+1:.4f}')

            pbar.set_description(', '.join(loss_message))

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()
        
        if best_losses is None or best_losses > epoch_losses:
            best_losses = epoch_losses
            torch.save(model, os.path.join('model', f'faster-rcnn-{best_losses:.0f}.pkl'))




In [5]:
train()

mat_path: ..\data\train answer\digitStruct.mat
image_dir: ..\data\train
device: cuda


  0%|          | 0/8351 [00:00<?, ?it/s]

epochs: 10
epoch 0 strat!


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
loss_classifier: 0.0310, loss_box_reg: 0.1111, loss_objectness: 0.0002, loss_rpn_box_reg: 0.0067, epoch_losses: 1.1870: 100%|██████████| 8351/8351 [1:02:08<00:00,  2.24it/s]
  0%|          | 0/8351 [00:00<?, ?it/s]

epoch 1 strat!


loss_classifier: 0.0432, loss_box_reg: 0.0908, loss_objectness: 0.0001, loss_rpn_box_reg: 0.0068, epoch_losses: 1.1860: 100%|██████████| 8351/8351 [1:04:00<00:00,  2.17it/s]
  0%|          | 0/8351 [00:00<?, ?it/s]

epoch 2 strat!


loss_classifier: 0.0334, loss_box_reg: 0.1079, loss_objectness: 0.0006, loss_rpn_box_reg: 0.0055, epoch_losses: 1.1817: 100%|██████████| 8351/8351 [1:04:38<00:00,  2.15it/s]
  0%|          | 0/8351 [00:00<?, ?it/s]

epoch 3 strat!


loss_classifier: 0.0419, loss_box_reg: 0.1152, loss_objectness: 0.0008, loss_rpn_box_reg: 0.0057, epoch_losses: 1.1768: 100%|██████████| 8351/8351 [1:04:27<00:00,  2.16it/s]
  0%|          | 0/8351 [00:00<?, ?it/s]

epoch 4 strat!


loss_classifier: 0.0360, loss_box_reg: 0.0893, loss_objectness: 0.0000, loss_rpn_box_reg: 0.0068, epoch_losses: 1.1727: 100%|██████████| 8351/8351 [1:04:50<00:00,  2.15it/s]
  0%|          | 0/8351 [00:00<?, ?it/s]

epoch 5 strat!


loss_classifier: 0.0459, loss_box_reg: 0.0894, loss_objectness: 0.0004, loss_rpn_box_reg: 0.0073, epoch_losses: 1.1725: 100%|██████████| 8351/8351 [1:04:51<00:00,  2.15it/s]
  0%|          | 0/8351 [00:00<?, ?it/s]

epoch 6 strat!


loss_classifier: 0.0282, loss_box_reg: 0.1152, loss_objectness: 0.0052, loss_rpn_box_reg: 0.0058, epoch_losses: 1.1715: 100%|██████████| 8351/8351 [1:05:09<00:00,  2.14it/s]
  0%|          | 0/8351 [00:00<?, ?it/s]

epoch 7 strat!


loss_classifier: 0.0274, loss_box_reg: 0.0892, loss_objectness: 0.0002, loss_rpn_box_reg: 0.0058, epoch_losses: 1.1717: 100%|██████████| 8351/8351 [1:05:09<00:00,  2.14it/s]
  0%|          | 0/8351 [00:00<?, ?it/s]

epoch 8 strat!


loss_classifier: 0.0384, loss_box_reg: 0.0826, loss_objectness: 0.0030, loss_rpn_box_reg: 0.0052, epoch_losses: 1.1656: 100%|██████████| 8351/8351 [1:05:24<00:00,  2.13it/s]