In [1]:
# torch
import torch
import torchvision
import torch.nn.functional as F
import torch.nn as nn
from torchvision import transforms as T
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from pytorch_lightning.loggers import CSVLogger
from tqdm.notebook import tnrange, tqdm_notebook

# standard libraries
import matplotlib.pyplot as plt
import math
import random
import os
import json
import cv2
import pandas as pd
import numpy as np
import sys
import json

sys.path.append("../")

from dataset import TreeDataset
from transforms import MaskResize
from visualize import BatchVisualizer as BV
import utils

In [2]:
data_configs = {
    'root': '../data/Some Trees Dataset.v12i.coco-segmentation',
    'seed': 42,
    'transforms': {
        'image_transforms': T.Compose([
            T.ToTensor(),
            T.Resize(size=(512, 512))
        ]),
        'mask_transforms': MaskResize(size=(512, 512))
    },
    'batch_size': 4
}

In [3]:
utils.set_seed(data_configs['seed'])
train_set = TreeDataset(data_configs['root'], 
                        'train', 
                        data_configs['transforms']['image_transforms'], 
                        data_configs['transforms']['mask_transforms']
)

val_set = TreeDataset(data_configs['root'], 
                        'valid', 
                        data_configs['transforms']['image_transforms'], 
                        data_configs['transforms']['mask_transforms']
)

test_set = TreeDataset(data_configs['root'], 
                        'test', 
                        data_configs['transforms']['image_transforms'], 
                        data_configs['transforms']['mask_transforms']
)

In [4]:
utils.set_seed(data_configs['seed'])

def collate_fn(batch):
    images, targets = zip(*batch)
    return list(images), list(targets)
    
train_loader = DataLoader(train_set, batch_size=data_configs['batch_size'], shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_set, batch_size=data_configs['batch_size'], shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_set, batch_size=data_configs['batch_size'], shuffle=True, collate_fn=collate_fn)

In [5]:
from models.resnets import BottleneckFPNBackbone, TreeMaskRCNN

In [6]:
x, y = next(iter(train_loader))

model = TreeMaskRCNN(backbone_variant='resnet152', pretrained=False)
device = torch.device('cuda'if torch.cuda.is_available() else 'cpu')
model.to(device)



TreeMaskRCNN(
  (model): MaskRCNN(
    (transform): GeneralizedRCNNTransform(
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        Resize(min_size=(800,), max_size=1333, mode='bilinear')
    )
    (backbone): BottleneckFPNBackbone(
      (body): ModuleDict(
        (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
        (layer1): Sequential(
          (0): Bottleneck(
            (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1

In [7]:
for batch in tqdm_notebook(train_loader):
    images, targets = batch
    
    images = [image.to(device) for image in images]
    targets = [{k: v.to(device) for k, v in target.items()} for target in targets]
    
    loss_dict = model(images, targets)
    break

  0%|          | 0/722 [00:00<?, ?it/s]

In [9]:
loss_dict

{'loss_classifier': tensor(0.8839, device='cuda:0', grad_fn=<NllLossBackward0>),
 'loss_box_reg': tensor(0.0675, device='cuda:0', grad_fn=<DivBackward0>),
 'loss_mask': tensor(10.3487, device='cuda:0',
        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>),
 'loss_objectness': tensor(0.6868, device='cuda:0',
        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>),
 'loss_rpn_box_reg': tensor(0.4808, device='cuda:0', grad_fn=<DivBackward0>)}