In [1]:
import numpy as np
import cv2
import torch 
import torchvision 
from engine import train_one_epoch, evaluate
from data_reading import WindowDataset
import utils
from torch.utils.data import DataLoader
import itertools
from torch.utils.tensorboard import SummaryWriter


In [2]:
def get_model():
    # Initialize Model, from pytorch tutorial: https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained = True)
    num_classes = 2 # window + background
    in_features = model.roi_heads.box_predictor.cls_score.in_features # Number of input features for the classifier
    model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
    return model.double()

In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f'running on device {device}')
model = get_model()
model.to(device)

running on device cuda


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [5]:
params = [p for p in model.parameters() if p.requires_grad]

optimizer = torch.optim.Adam(params, lr = 0.00001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 3, gamma = 0.1)

In [6]:
dataset_train = WindowDataset()
dataset_val = WindowDataset(images_folder="data/val/images/", labels_path="data/val/anno")
dataset_test = WindowDataset(images_folder="data/test/images/", labels_path="data/test/anno")

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [7]:
train = DataLoader(dataset_train, batch_size=1, shuffle=True, collate_fn=utils.collate_fn)
print(next(iter(train)))

val = DataLoader(dataset_val, batch_size=1, shuffle=True, collate_fn=utils.collate_fn)
test = DataLoader(dataset_test, batch_size=1, shuffle=True, collate_fn=utils.collate_fn)

((tensor([[[0.8431, 0.8431, 0.8431,  ..., 0.5804, 0.5725, 0.5686],
         [0.8431, 0.8431, 0.8431,  ..., 0.5843, 0.5804, 0.5765],
         [0.8431, 0.8431, 0.8431,  ..., 0.5961, 0.5882, 0.5882],
         ...,
         [0.5922, 0.5804, 0.5961,  ..., 0.8235, 0.8039, 0.7882],
         [0.5843, 0.5843, 0.5725,  ..., 0.8196, 0.8000, 0.7843],
         [0.5725, 0.5961, 0.5725,  ..., 0.8157, 0.7961, 0.7804]],

        [[0.8824, 0.8824, 0.8824,  ..., 0.9647, 0.9608, 0.9569],
         [0.8824, 0.8824, 0.8824,  ..., 0.9569, 0.9608, 0.9569],
         [0.8824, 0.8824, 0.8824,  ..., 0.9529, 0.9529, 0.9529],
         ...,
         [0.8275, 0.8157, 0.8196,  ..., 0.8078, 0.7843, 0.7686],
         [0.8235, 0.8235, 0.8078,  ..., 0.8039, 0.7804, 0.7647],
         [0.8118, 0.8353, 0.8078,  ..., 0.8000, 0.7765, 0.7608]],

        [[0.8471, 0.8471, 0.8471,  ..., 0.9765, 0.9804, 0.9765],
         [0.8471, 0.8471, 0.8471,  ..., 0.9725, 0.9843, 0.9804],
         [0.8471, 0.8471, 0.8471,  ..., 0.9647, 0.9725, 

In [8]:
print(next(iter(train))[1][0]['labels'].dtype)

torch.int64


In [9]:
print(device)
evaluate(model, train, device, print_freq=10)

cuda
Test:  [  0/612]  eta: 0:39:25  model_time: 3.8575 (3.8575)  evaluator_time: 0.0050 (0.0050)  time: 3.8655  data: 0.0020  max mem: 999
Test:  [ 10/612]  eta: 0:25:17  model_time: 2.3716 (2.5140)  evaluator_time: 0.0050 (0.0060)  time: 2.5215  data: 0.0015  max mem: 999
Test:  [ 20/612]  eta: 0:24:26  model_time: 2.3896 (2.4695)  evaluator_time: 0.0050 (0.0062)  time: 2.4080  data: 0.0015  max mem: 999
Test:  [ 30/612]  eta: 0:23:42  model_time: 2.3676 (2.4361)  evaluator_time: 0.0040 (0.0058)  time: 2.4007  data: 0.0015  max mem: 999
Test:  [ 40/612]  eta: 0:23:08  model_time: 2.3656 (2.4190)  evaluator_time: 0.0050 (0.0060)  time: 2.3736  data: 0.0015  max mem: 999
Test:  [ 50/612]  eta: 0:22:35  model_time: 2.3656 (2.4043)  evaluator_time: 0.0050 (0.0058)  time: 2.3627  data: 0.0016  max mem: 999
Averaged stats: model_time: 2.3656 (2.4043)  evaluator_time: 0.0050 (0.0058)
Accumulating evaluation results...
DONE (t=0.04s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.9

<coco_eval.CocoEvaluator at 0x2321c585490>

In [10]:
num_epochs = 40
writer = SummaryWriter()
for epoch in range(num_epochs):
    # Train for one epoch, while printing every 10 iterations
    train_one_epoch(writer, model, optimizer, train, device, epoch, print_freq=100)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, val, device, print_freq=100)

print("DONE")

Epoch: [0]  [  0/612]  eta: 1:01:58  lr: 0.000010  loss: 1.1342 (1.1342)  loss_classifier: 0.8940 (0.8940)  loss_box_reg: 0.0001 (0.0001)  loss_objectness: 0.1829 (0.1829)  loss_rpn_box_reg: 0.0572 (0.0572)  time: 6.0756  data: 0.0010  max mem: 2044
Epoch: [0]  [100/612]  eta: 0:51:48  lr: 0.000010  loss: 0.0307 (0.1224)  loss_classifier: 0.0080 (0.0559)  loss_box_reg: 0.0001 (0.0126)  loss_objectness: 0.0074 (0.0271)  loss_rpn_box_reg: 0.0132 (0.0267)  time: 6.0711  data: 0.0020  max mem: 2887
Epoch: [0]  [200/612]  eta: 0:41:41  lr: 0.000010  loss: 0.0165 (0.0848)  loss_classifier: 0.0029 (0.0350)  loss_box_reg: 0.0000 (0.0097)  loss_objectness: 0.0036 (0.0201)  loss_rpn_box_reg: 0.0088 (0.0200)  time: 6.0697  data: 0.0022  max mem: 2887
Epoch: [0]  [300/612]  eta: 0:31:27  lr: 0.000010  loss: 0.0121 (0.0740)  loss_classifier: 0.0025 (0.0293)  loss_box_reg: 0.0000 (0.0130)  loss_objectness: 0.0019 (0.0160)  loss_rpn_box_reg: 0.0056 (0.0158)  time: 6.0709  data: 0.0020  max mem: 2887


Epoch: [2]  [611/612]  eta: 0:00:06  lr: 0.000010  loss: 0.0027 (0.0250)  loss_classifier: 0.0002 (0.0082)  loss_box_reg: 0.0000 (0.0107)  loss_objectness: 0.0004 (0.0022)  loss_rpn_box_reg: 0.0008 (0.0039)  time: 6.0885  data: 0.0014  max mem: 2887
Epoch: [2] Total time: 1:02:01 (6.0803 s / it)
Test:  [  0/188]  eta: 0:07:17  model_time: 2.3262 (2.3262)  evaluator_time: 0.0000 (0.0000)  time: 2.3282  data: 0.0020  max mem: 2887
Averaged stats: model_time: 2.3252 (2.3257)  evaluator_time: 0.0010 (0.0005)
Accumulating evaluation results...
DONE (t=0.00s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
 Average Precision  

Epoch: [5]  [100/612]  eta: 0:51:28  lr: 0.000001  loss: 0.0040 (0.0163)  loss_classifier: 0.0007 (0.0042)  loss_box_reg: 0.0000 (0.0086)  loss_objectness: 0.0002 (0.0010)  loss_rpn_box_reg: 0.0024 (0.0025)  time: 5.7861  data: 0.0050  max mem: 2887
Epoch: [5]  [200/612]  eta: 0:41:37  lr: 0.000001  loss: 0.0020 (0.0217)  loss_classifier: 0.0002 (0.0061)  loss_box_reg: 0.0000 (0.0117)  loss_objectness: 0.0001 (0.0010)  loss_rpn_box_reg: 0.0007 (0.0029)  time: 6.0898  data: 0.0058  max mem: 2887
Epoch: [5]  [300/612]  eta: 0:31:34  lr: 0.000001  loss: 0.0034 (0.0236)  loss_classifier: 0.0005 (0.0068)  loss_box_reg: 0.0000 (0.0127)  loss_objectness: 0.0001 (0.0012)  loss_rpn_box_reg: 0.0008 (0.0029)  time: 6.0898  data: 0.0060  max mem: 2887
Epoch: [5]  [400/612]  eta: 0:21:28  lr: 0.000001  loss: 0.0020 (0.0208)  loss_classifier: 0.0001 (0.0060)  loss_box_reg: 0.0000 (0.0110)  loss_objectness: 0.0001 (0.0010)  loss_rpn_box_reg: 0.0007 (0.0027)  time: 6.0885  data: 0.0050  max mem: 2887


Test:  [  0/188]  eta: 0:07:17  model_time: 2.3218 (2.3218)  evaluator_time: 0.0010 (0.0010)  time: 2.3248  data: 0.0020  max mem: 2887
Averaged stats: model_time: 2.3248 (2.3255)  evaluator_time: 0.0010 (0.0005)
Accumulating evaluation results...
DONE (t=0.00s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.0

Epoch: [10]  [200/612]  eta: 0:41:48  lr: 0.000000  loss: 0.0009 (0.0150)  loss_classifier: 0.0002 (0.0045)  loss_box_reg: 0.0000 (0.0078)  loss_objectness: 0.0001 (0.0005)  loss_rpn_box_reg: 0.0006 (0.0022)  time: 6.0888  data: 0.0012  max mem: 2887
Epoch: [10]  [300/612]  eta: 0:31:39  lr: 0.000000  loss: 0.0021 (0.0186)  loss_classifier: 0.0001 (0.0055)  loss_box_reg: 0.0000 (0.0102)  loss_objectness: 0.0001 (0.0005)  loss_rpn_box_reg: 0.0018 (0.0023)  time: 6.0879  data: 0.0011  max mem: 2887
Epoch: [10]  [400/612]  eta: 0:21:30  lr: 0.000000  loss: 0.0027 (0.0178)  loss_classifier: 0.0002 (0.0053)  loss_box_reg: 0.0000 (0.0095)  loss_objectness: 0.0001 (0.0006)  loss_rpn_box_reg: 0.0019 (0.0023)  time: 6.0883  data: 0.0010  max mem: 2887
Epoch: [10]  [500/612]  eta: 0:11:21  lr: 0.000000  loss: 0.0008 (0.0175)  loss_classifier: 0.0000 (0.0052)  loss_box_reg: 0.0000 (0.0093)  loss_objectness: 0.0001 (0.0007)  loss_rpn_box_reg: 0.0006 (0.0023)  time: 6.0879  data: 0.0014  max mem: 2

Averaged stats: model_time: 2.3249 (2.3250)  evaluator_time: 0.0010 (0.0006)
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medi

KeyboardInterrupt: 

In [None]:
model.eval()
img, labs = next(iter(train))
print(labs)
print(img[0].shape)
img = tuple([i.to(device) for i in img])

In [None]:
model(img)