In [1]:
import numpy as np
import cv2
import torch 
import torchvision 
from engine import train_one_epoch, evaluate
from data_reading import WindowDataset
import utils
from torch.utils.data import DataLoader
import itertools
from torch.utils.tensorboard import SummaryWriter


In [2]:
def get_model():
    # Initialize Model, from pytorch tutorial: https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained = True)
    num_classes = 2 # window + background
    in_features = model.roi_heads.box_predictor.cls_score.in_features # Number of input features for the classifier
    model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
    return model.double()

In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f'running on device {device}')
model = get_model()
model.to(device)

running on device cuda


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [4]:
params = [p for p in model.parameters() if p.requires_grad]

optimizer = torch.optim.Adam(params, lr = 0.00001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 3, gamma = 0.1)

In [5]:
dataset_train = WindowDataset()
dataset_val = WindowDataset(images_folder="data/val/images/", labels_path="data/val/anno")
dataset_test = WindowDataset(images_folder="data/test/images/", labels_path="data/test/anno")

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [6]:
train = DataLoader(dataset_train, batch_size=1, shuffle=True, collate_fn=utils.collate_fn)
print(next(iter(train)))

val = DataLoader(dataset_val, batch_size=1, shuffle=True, collate_fn=utils.collate_fn)
test = DataLoader(dataset_test, batch_size=1, shuffle=True, collate_fn=utils.collate_fn)

((tensor([[[0.9922, 0.9961, 0.9804,  ..., 0.9020, 0.5843, 0.7686],
         [0.9922, 0.9961, 0.9843,  ..., 0.9216, 0.6549, 0.7255],
         [0.9882, 0.9961, 0.9882,  ..., 0.9373, 0.7569, 0.6549],
         ...,
         [0.1529, 0.1412, 0.1373,  ..., 0.5451, 0.5373, 0.5451],
         [0.1451, 0.1412, 0.1333,  ..., 0.5412, 0.5294, 0.5412],
         [0.1490, 0.1451, 0.1333,  ..., 0.5373, 0.5294, 0.5373]],

        [[0.9961, 1.0000, 0.9922,  ..., 0.9255, 0.6078, 0.7922],
         [0.9961, 1.0000, 0.9961,  ..., 0.9451, 0.6784, 0.7490],
         [0.9922, 1.0000, 1.0000,  ..., 0.9608, 0.7804, 0.6784],
         ...,
         [0.2275, 0.2157, 0.2118,  ..., 0.5882, 0.5804, 0.5882],
         [0.2196, 0.2157, 0.2078,  ..., 0.5843, 0.5725, 0.5843],
         [0.2235, 0.2196, 0.2078,  ..., 0.5804, 0.5725, 0.5804]],

        [[0.9647, 0.9686, 0.9647,  ..., 0.8863, 0.5686, 0.7529],
         [0.9647, 0.9686, 0.9686,  ..., 0.9059, 0.6392, 0.7098],
         [0.9608, 0.9686, 0.9725,  ..., 0.9216, 0.7412, 

In [7]:
print(next(iter(train))[1][0]['labels'].dtype)

torch.int64


In [8]:
print(device)
evaluate(model, train, device, print_freq=10)

cuda
Test:  [  0/612]  eta: 0:38:39  model_time: 3.7879 (3.7879)  evaluator_time: 0.0000 (0.0000)  time: 3.7899  data: 0.0020  max mem: 999
Test:  [ 10/612]  eta: 0:24:27  model_time: 2.2966 (2.4327)  evaluator_time: 0.0000 (0.0036)  time: 2.4381  data: 0.0018  max mem: 999
Test:  [ 20/612]  eta: 0:23:25  model_time: 2.2968 (2.3687)  evaluator_time: 0.0000 (0.0040)  time: 2.3029  data: 0.0009  max mem: 999
Test:  [ 30/612]  eta: 0:22:49  model_time: 2.3006 (2.3477)  evaluator_time: 0.0000 (0.0042)  time: 2.3056  data: 0.0000  max mem: 999
Test:  [ 40/612]  eta: 0:22:19  model_time: 2.3006 (2.3362)  evaluator_time: 0.0000 (0.0038)  time: 2.3066  data: 0.0009  max mem: 999
Test:  [ 50/612]  eta: 0:21:52  model_time: 2.2995 (2.3303)  evaluator_time: 0.0000 (0.0036)  time: 2.3069  data: 0.0010  max mem: 999
Averaged stats: model_time: 2.2995 (2.3303)  evaluator_time: 0.0000 (0.0036)
Accumulating evaluation results...
DONE (t=0.03s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.9

<coco_eval.CocoEvaluator at 0x1ef8f498730>

In [9]:
num_epochs = 40
writer = SummaryWriter()
for epoch in range(num_epochs):
    # Train for one epoch, while printing every 10 iterations
    train_one_epoch(writer, model, optimizer, train, device, epoch, print_freq=100)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, val, device, print_freq=100)

print("DONE")

Epoch: [0]  [  0/612]  eta: 1:01:44  lr: 0.000010  loss: 0.6271 (0.6271)  loss_classifier: 0.5365 (0.5365)  loss_box_reg: 0.0001 (0.0001)  loss_objectness: 0.0610 (0.0610)  loss_rpn_box_reg: 0.0294 (0.0294)  time: 6.0527  data: 0.0010  max mem: 2044
Epoch: [0]  [100/612]  eta: 0:51:42  lr: 0.000010  loss: 0.0173 (0.1096)  loss_classifier: 0.0035 (0.0429)  loss_box_reg: 0.0001 (0.0123)  loss_objectness: 0.0026 (0.0286)  loss_rpn_box_reg: 0.0111 (0.0259)  time: 6.0860  data: 0.0015  max mem: 2888
Epoch: [0]  [200/612]  eta: 0:41:35  lr: 0.000010  loss: 0.0134 (0.0842)  loss_classifier: 0.0035 (0.0300)  loss_box_reg: 0.0000 (0.0113)  loss_objectness: 0.0022 (0.0235)  loss_rpn_box_reg: 0.0037 (0.0194)  time: 6.0373  data: 0.0009  max mem: 2888
Epoch: [0]  [300/612]  eta: 0:31:31  lr: 0.000010  loss: 0.0105 (0.0655)  loss_classifier: 0.0019 (0.0226)  loss_box_reg: 0.0000 (0.0089)  loss_objectness: 0.0013 (0.0180)  loss_rpn_box_reg: 0.0040 (0.0159)  time: 6.1382  data: 0.0020  max mem: 2888


KeyboardInterrupt: 

In [7]:
model.eval()
img, labs = next(iter(train))
print(labs)
print(img[0].shape)
img = tuple([i.to(device) for i in img])

({'boxes': tensor([[ 64.,  47., 191., 142.]]), 'labels': tensor([1]), 'image_id': tensor(63), 'area': tensor(63)},)
torch.Size([3, 96, 128])


In [8]:
model(img)

[{'boxes': tensor([[4.2479e+00, 4.2935e+01, 4.0440e+01, 5.2908e+01],
          [9.1212e+01, 2.3942e+01, 1.1063e+02, 4.2728e+01],
          [6.8612e+01, 3.3541e+01, 9.9707e+01, 4.6959e+01],
          [6.6769e+00, 4.4452e+01, 1.2449e+01, 5.3533e+01],
          [1.4369e+01, 4.4341e+01, 5.2674e+01, 5.4182e+01],
          [6.6431e+01, 3.7794e+01, 7.4878e+01, 4.8484e+01],
          [7.1389e+01, 1.0034e+01, 1.2695e+02, 4.9351e+01],
          [6.5990e+01, 3.2809e+01, 7.2087e+01, 4.9844e+01],
          [7.7713e+00, 4.7267e+01, 3.9410e+01, 5.5803e+01],
          [5.7797e+01, 2.5230e+01, 7.2375e+01, 4.3206e+01],
          [6.5850e+01, 3.7428e+01, 9.4938e+01, 5.0607e+01],
          [9.3713e+01, 2.8376e+01, 1.0661e+02, 4.5517e+01],
          [7.0734e+01, 2.9658e+01, 9.0392e+01, 4.9930e+01],
          [1.0239e+02, 2.5095e+01, 1.1286e+02, 4.5843e+01],
          [3.9647e+00, 4.5867e+01, 1.2929e+01, 5.4962e+01],
          [8.6810e+01, 2.5777e+01, 9.9273e+01, 4.5077e+01],
          [1.0409e+02, 1.3592e+