In [1]:
from torch.utils.data import DataLoader
from src.dataset import FrameDataset, custom_collate
from sklearn.preprocessing import LabelEncoder
from torchvision.models.detection import fasterrcnn_resnet50_fpn
import torch
from src.averager import Averager
import os


from src import config
hparams = config.hparams

In [2]:
le = LabelEncoder()
le.fit(['apadding', 'bicycle','human', 'motorcycle', 'vehicle'])


In [3]:
training_data = FrameDataset(root = hparams.train_path, label_encoder = le)
train_dataloader = DataLoader(training_data, batch_size=hparams.batch_size, shuffle=True, collate_fn=custom_collate)

In [4]:
# load a model; pre-trained on COCO
model = fasterrcnn_resnet50_fpn(num_classes=hparams.num_classes)
device = torch.device(hparams.device)

In [5]:
model.train()
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(model.parameters(), lr=hparams.learning_rate, momentum=0.9, weight_decay=0.00001)


loss_hist = Averager()
itr = 1

for epoch in range(hparams.max_epochs):
    loss_hist.reset()
    
    for frames, targets in train_dataloader:
        
        frames = [frame.to(device) for frame in frames]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(frames, targets)   ##Return the loss

        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()

        loss_hist.send(loss_value)  #Average out the loss

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if itr % 50 == 0:
            print(f"Iteration #{itr} loss: {loss_value}")

        itr += 1

    print(f"Epoch #{epoch} loss: {loss_hist.value}")

Iteration #50 loss: 0.36814434403107865
Iteration #100 loss: 0.3358881357702124
Iteration #150 loss: 0.343179546844417
Iteration #200 loss: 0.5043411239549764
Iteration #250 loss: 0.4155584117938979
Iteration #300 loss: 0.27577452137573527
Iteration #350 loss: 0.24899384515620787
Iteration #400 loss: 0.5403421564894159
Iteration #450 loss: 0.5971994017380827
Iteration #500 loss: 0.30830810682326004
Iteration #550 loss: 0.3175986090868016


In [7]:
# make sure the output directory exists
log_name = log_name = f"{hparams.experiment_name}-{hparams.num_classes}"
output_dir = os.path.join(hparams.output_dir,log_name)
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

path = os.path.join(output_dir,  log_name + str(epoch) + '.pth')
torch.save(model, path)

In [9]:
model.eval()
model.to(torch.device('cpu'))


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=1e-05)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=1e-05)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=1e-05)
          (relu

In [20]:
from src.makeframes import make_frames
test_path = 'data/test/clips/20200515/clip_28_1250.mp4'
make_frames(test_path)


In [22]:
import cv2 as cv
random_frame_path = 'data/test/clips/20200515/clip_28_1250/frame_0006.jpg'
random_frame = cv.imread(random_frame_path)/255
random_frame = random_frame.transpose((2, 0, 1))
random_frame = torch.tensor(random_frame).type(torch.float)
random_frame = random_frame.unsqueeze(0)

test = model(random_frame)

In [28]:
print(test)

100
