In [None]:
import os
import csv
import cv2
import json
import torch
import pathlib
import numpy as np
from glob import glob
from skimage import transform as sktsf
from torchvision import transforms as tvtsf
from typing import List, Dict

import torchvision
from torch import nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torchvision.models import vgg16
from pytorch_lightning import Trainer
from torch.utils.data import DataLoader
from torchvision.transforms import Compose
from torchvision.ops import MultiScaleRoIAlign
from torchvision.models.detection import FasterRCNN
from pytorch_lightning.loggers.neptune import NeptuneLogger
from torchvision.models.detection.roi_heads import RoIHeads
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.detection.generalized_rcnn import GeneralizedRCNN
from torchvision.models.detection.rpn import RPNHead, RegionProposalNetwork
from torchvision.models.detection.transform import GeneralizedRCNNTransform

In [None]:


################################################################################################################################
##################################################### utils.py ###########################################################
################################################################################################################################


min_size = 600
max_size = 1000
classes = np.loadtxt('/kaggle/input/test-dataset/trainval/classes.csv', skiprows=1, dtype=str, delimiter=',')
labels = classes[:, 2].astype(np.uint8)

def get_filenames_of_path(path: pathlib.Path, ext: str = "*"):
    """
    Returns a list of files in a directory/path. Uses pathlib.
    """
    filenames = [file for file in path.glob(ext) if file.is_file()]
    assert len(filenames) > 0, f"No files found in path: {path}"
    return filenames

def xywh_to_xyxy(boxes):
    """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
    return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))

def rot(n):
    n = np.asarray(n).flatten()
    assert n.shape == (3,)
    theta = np.linalg.norm(n)
    if theta:
        n /= theta
        K = np.array([[0, -n[2], n[1]], [n[2], 0, -n[0]], [-n[1], n[0], 0]])

        return np.identity(3) + np.sin(theta) * K + (1 - np.cos(theta)) * K @ K
    else:
        return np.identity(3)


def get_bbox(p0, p1):
    """
    Input:
    *   p0, p1
        (3)
        Corners of a bounding box represented in the body frame.

    Output:
    *   v
        (3, 8)
        Vertices of the bounding box represented in the body frame.
    *   e
        (2, 14)
        Edges of the bounding box. The first 2 edges indicate the `front` side
        of the box.
    """
    v = np.array([
        [p0[0], p0[0], p0[0], p0[0], p1[0], p1[0], p1[0], p1[0]],
        [p0[1], p0[1], p1[1], p1[1], p0[1], p0[1], p1[1], p1[1]],
        [p0[2], p1[2], p0[2], p1[2], p0[2], p1[2], p0[2], p1[2]]
    ])
    e = np.array([
        [2, 3, 0, 0, 3, 3, 0, 1, 2, 3, 4, 4, 7, 7],
        [7, 6, 1, 2, 1, 2, 4, 5, 6, 7, 5, 6, 5, 6]
    ], dtype=np.uint8)

    return v, e

def collate_double(batch):
    """
    collate function for the ObjectDetectionDataSet.
    Only used by the dataloader.
    """
    x = [sample['x'] for sample in batch]
    y = [sample['y'] for sample in batch]
    x_name = [sample['x_name'] for sample in batch]
    y_name = [sample['y_name'] for sample in batch]
    return x, y, x_name, y_name

def pytorch_normalze(img):
    """
    https://github.com/pytorch/vision/issues/223
    return appr -1~1 RGB
    """
    normalize = tvtsf.Normalize(mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])
    img = normalize(torch.from_numpy(img))
    return img.numpy()

def resize_bbox(bbox, in_size, out_size):
    """Resize bounding boxes according to image resize.
    The bounding boxes are expected to be packed into a two dimensional
    tensor of shape :math:`(R, 4)`, where :math:`R` is the number of
    bounding boxes in the image. The second axis represents attributes of
    the bounding box. They are :math:`(y_{min}, x_{min}, y_{max}, x_{max})`,
    where the four attributes are coordinates of the top left and the
    bottom right vertices.
    Args:
        bbox (~numpy.ndarray): An array whose shape is :math:`(R, 4)`.
            :math:`R` is the number of bounding boxes.
        in_size (tuple): A tuple of length 2. The height and the width
            of the image before resized.
        out_size (tuple): A tuple of length 2. The height and the width
            of the image after resized.
    Returns:
        ~numpy.ndarray:
        Bounding boxes rescaled according to the given image shapes.
    """
    bbox = bbox.copy()
    y_scale = float(out_size[0]) / in_size[0]
    x_scale = float(out_size[1]) / in_size[1]
    bbox[0] = x_scale * bbox[0]
    bbox[1] = y_scale * bbox[1]
    bbox[2] = x_scale * bbox[2]
    bbox[3] = y_scale * bbox[3]
    return bbox

def preprocess(img, min_size=600, max_size=1000):
    """Preprocess an image for feature extraction.
    The length of the shorter edge is scaled to :obj:`self.min_size`.
    After the scaling, if the length of the longer edge is longer than
    :param min_size:
    :obj:`self.max_size`, the image is scaled to fit the longer edge
    to :obj:`self.max_size`.
    After resizing the image, the image is subtracted by a mean image value
    :obj:`self.mean`.
    Args:
        img (~numpy.ndarray): An image. This is in CHW and RGB format.
            The range of its value is :math:`[0, 255]`.
    Returns:
        ~numpy.ndarray: A preprocessed image.
    """
    C, H, W = img.shape
    scale1 = min_size / min(H, W)
    scale2 = max_size / max(H, W)
    scale = min(scale1, scale2)
    img = img / 255.
    img = sktsf.resize(img, (C, H * scale, W * scale), mode='reflect',anti_aliasing=False)
    # both the longer and shorter should be less than
    # max_size and min_size
    normalize = pytorch_normalze
    return normalize(img)

class Transform(object):

    def __init__(self, min_size=600, max_size=1000):
        self.min_size = min_size
        self.max_size = max_size

    def __call__(self, in_data):
        img, bbox = in_data
        _, H, W = img.shape
        img = preprocess(img, self.min_size, self.max_size)
        _, o_H, o_W = img.shape
        scale = o_H / H
        bbox = resize_bbox(bbox, (H, W), (o_H, o_W))

        return img, bbox
    
class TransformTest(object):

    def __init__(self, min_size=600, max_size=1000):
        self.min_size = min_size
        self.max_size = max_size

    def __call__(self, img):
        _, H, W = img.shape
        img = preprocess(img, self.min_size, self.max_size)

        return img
    
class PrepareTestDataset(torch.utils.data.Dataset):
    def __init__(self,
                inputs: List[pathlib.Path],
                use_cache: bool = False):
        self.inputs = inputs
        self.use_cache = use_cache
        self.transform = TransformTest(min_size, max_size)

        if self.use_cache:
            # Use multiprocessing to load images and targets into RAM
            from multiprocessing import Pool
            with Pool() as pool:
                self.cached_data = pool.starmap(self.read_images, zip(inputs, targets))
                
                        
    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        if self.use_cache:
            return self.cached_data[idx]
        else:
            input_ID = self.inputs[idx]
            # Load input and target
            x = cv2.imread(input_ID)

        x_trans = np.transpose(x, (2,0,1))
        x = self.transform(x_trans)

        # Convert to tensor
        x = torch.from_numpy(x).type(torch.float32)
        
        return {'x': x}


class PrepareDataset(torch.utils.data.Dataset):
    def __init__(self,
                 inputs: List[pathlib.Path],
                 targets: List[pathlib.Path],
                 use_cache: bool = False,
                 convert_to_format: str = None,
                 mapping: Dict = None
                 ):
        self.inputs = inputs
        self.targets = targets
        self.use_cache = use_cache
        self.convert_to_format = convert_to_format
        self.mapping = mapping
        self.transform = Transform(min_size, max_size)

        if self.use_cache:
            # Use multiprocessing to load images and targets into RAM
            from multiprocessing import Pool
            with Pool() as pool:
                self.cached_data = pool.starmap(self.read_images, zip(inputs, targets))


    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        if self.use_cache:
            return self.cached_data[idx]
        else:
            input_ID = self.inputs[idx]
            target_ID = self.targets[idx]
            # Load input and target
            x = cv2.imread(input_ID)
            xmin,xmax,ymin,ymax,label = self.get_bb(target_ID,x)

        bboxes = np.array([xmin,ymin,xmax,ymax])
        
        y = np.array([label])

        x_trans = np.transpose(x, (2,0,1))
        x, bboxes = self.transform((x_trans, bboxes))
        # Create target
        target = {'boxes': bboxes,
                  'labels': y}

        # Convert to tensor
        x = torch.from_numpy(x).type(torch.float32)
        bboxes = torch.from_numpy(bboxes).to(torch.float32)
        target['boxes'] = bboxes.reshape(-1, 4)
        target['labels'] = torch.from_numpy(target['labels']).type(torch.int64)

        return {'x': x, 'y': target, 'x_name': self.inputs[idx], 'y_name': self.targets[idx]}


    def get_bb(self,path,x):
        bbox = np.fromfile(path, dtype=np.float32)
        proj = np.fromfile(path.replace('_bbox.bin', '_proj.bin'), dtype=np.float32)
        proj.resize([3, 4])

        b = bbox.reshape([-1, 11])[0]
        R = rot(b[0:3])
        t = b[3:6]

        sz = b[6:9]
        vert_3D, edges = get_bbox(-sz / 2, sz / 2)
        vert_3D = R @ vert_3D + t[:, np.newaxis]

        vert_2D = proj @ np.vstack([vert_3D, np.ones(vert_3D.shape[1])])
        vert_2D = vert_2D / vert_2D[2, :]

        bbox2Dx = []
        bbox2Dy = []
        for e in edges.T:
            # ax1.plot(vert_2D[0, e], vert_2D[1, e], color=clr)
            bbox2Dx.append(vert_2D[0,e][0])
            bbox2Dx.append(vert_2D[0,e][1])
            bbox2Dy.append(vert_2D[1,e][0])
            bbox2Dy.append(vert_2D[1,e][1])


        xmin,xmax = int(min(bbox2Dx)), int(max(bbox2Dx))
        ymin,ymax = int(min(bbox2Dy)), int(max(bbox2Dy))


        ymin = max(ymin,0)
        ymax = min(ymax,x.shape[0])
        xmin = max(xmin,0)
        xmax = min(xmax,x.shape[1])

        class_id = b[9].astype(np.uint8)
        label = labels[class_id]

        return xmin,xmax,ymin,ymax,label

In [None]:
#Set GPU as device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
######################################################################
########################### model.py #################################
######################################################################

class TwoMLPHead(nn.Module):
    def __init__(self, in_channels, representation_size):
        super(TwoMLPHead, self).__init__()
        self.fc6 = nn.Linear(in_channels, representation_size)
        self.fc7 = nn.Linear(representation_size, representation_size)

    def forward(self, x):
        x = x.flatten(start_dim=1)
        x = F.relu(self.fc6(x))
        x = F.relu(self.fc7(x))
        return x

class FastRCNNPredictor(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(FastRCNNPredictor, self).__init__()
        self.cls_score = nn.Linear(in_channels, num_classes)
        self.bbox_pred = nn.Linear(in_channels, num_classes * 4)

    def forward(self, x):
        if x.dim() == 4:
            assert list(x.shape[2:]) == [1, 1]
        x = x.flatten(start_dim=1)
        scores = self.cls_score(x)
        bbox_deltas = self.bbox_pred(x)
        return scores, bbox_deltas

#------------------VGG16-----------------------
num_classes = 3
backbone = vgg16(pretrained = True).features

# Freeze all the layers except last 3 (Conv+Relu)
c=0
for child in backbone.children():
    c = c+1
    if(c>24):
        for param in child.parameters():
            param.requires_grad = False
backbone.out_channels = 512

#------------------ResNet-----------------------
# backbone = resnet50(pretrained = True)
# modules = list(backbone.children())[:-1]
# backbone = nn.Sequential(*modules)

# Freeze all the layers except last 3 (Conv+Relu)
# c=0
# for child in backbone.children():
#     c = c+1
#     if(c>5):
#         for param in child.parameters():
#             param.requires_grad = False
# backbone.out_channels = 2048

anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),))
rpn_head = RPNHead(backbone.out_channels, anchor_generator.num_anchors_per_location()[0])
box_roi_pool = MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2)
box_head = TwoMLPHead(backbone.out_channels * box_roi_pool.output_size[0] ** 2, 1024)
box_predictor = FastRCNNPredictor(1024, num_classes)


class FasterRCNN(GeneralizedRCNN):
  def __init__(self, backbone, num_classes=num_classes,
               # transform parameters
               min_size=600, max_size=1000,
               image_mean=[0.485, 0.456, 0.406], image_std=[0.229, 0.224, 0.225],
               # RPN parameters
               rpn_anchor_generator=anchor_generator, rpn_head=rpn_head,
               rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000,
               rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000,
               rpn_nms_thresh=0.7,
               rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,
               rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,
               # Box parameters
               box_roi_pool=box_roi_pool, box_head=box_head, box_predictor=box_predictor,
               box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,
               box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5,
               box_batch_size_per_image=512, box_positive_fraction=0.25,
               bbox_reg_weights=None):
    
    self.min_size = min_size
    self.max_size = max_size
    self.image_mean = image_mean
    self.image_std= image_std
    out_channels = backbone.out_channels
    rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)
    rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)
    rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, 
                                rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh)
    roi_heads = RoIHeads(box_roi_pool, box_head, box_predictor, box_fg_iou_thresh, box_bg_iou_thresh,
                         box_batch_size_per_image, box_positive_fraction, bbox_reg_weights,
                         box_score_thresh, box_nms_thresh, box_detections_per_img)
    transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)
    self.num_classes = num_classes
    super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)

In [None]:
######################################################################
########################### train.py #################################
######################################################################


from itertools import chain

import pytorch_lightning as pl


class FasterRCNN_lightning(pl.LightningModule):
    def __init__(self,
                 model: torch.nn.Module,
                 lr: float = 0.0001,
                 iou_threshold: float = 0.5
                 ):
        super().__init__()

        # Model
        self.model = model

        # Classes (background inclusive)
        self.num_classes = self.model.num_classes

        # Learning rate
        self.lr = lr

        # IoU threshold
        self.iou_threshold = iou_threshold

        # Transformation parameters
        self.mean = model.image_mean
        self.std = model.image_std
        self.min_size = model.min_size
        self.max_size = model.max_size

        # Save hyperparameters
        self.save_hyperparameters()

    def forward(self, x):
        self.model.eval()
        return self.model(x)

    def training_step(self, batch, batch_idx):
        # Batch
        x, y, x_name, y_name = batch  # tuple unpacking

        loss_dict = self.model(x, y)
        loss = sum(loss for loss in loss_dict.values())

#         self.log_dict(loss_dict)
        return loss

    def validation_step(self, batch, batch_idx):
        return

    def validation_epoch_end(self, outs):
        return
        
        print("\n############## val acc: ", acc, "#############\n")
#         self.log('Validation Accuracy', acc)

    def test_step(self, batch, batch_idx):
        # Batch
        x, y, x_name, y_name = batch

        # Inference
        preds = self.model(x)

        gt_boxes = [from_dict_to_BoundingBox(target, name=name, groundtruth=True) for target, name in zip(y, x_name)]
        gt_boxes = list(chain(*gt_boxes))

        pred_boxes = [from_dict_to_BoundingBox(pred, name=name, groundtruth=False) for pred, name in zip(preds, x_name)]
        pred_boxes = list(chain(*pred_boxes))

        return {'pred_boxes': pred_boxes, 'gt_boxes': gt_boxes}

    def test_epoch_end(self, outs):
        gt_boxes = [out['gt_boxes'] for out in outs]
        gt_boxes = list(chain(*gt_boxes))
        pred_boxes = [out['pred_boxes'] for out in outs]
        pred_boxes = list(chain(*pred_boxes))

        from metrics.pascal_voc_evaluator import get_pascalvoc_metrics
        from metrics.enumerators import MethodAveragePrecision
        metric = get_pascalvoc_metrics(gt_boxes=gt_boxes,
                                       det_boxes=pred_boxes,
                                       iou_threshold=self.iou_threshold,
                                       method=MethodAveragePrecision.EVERY_POINT_INTERPOLATION,
                                       generate_table=True)

        per_class, mAP = metric['per_class'], metric['mAP']
#         self.log('Test_mAP', mAP)

#         for key, value in per_class.items():
#             self.log(f'Test_AP_{key}', value['AP'])

    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.model.parameters(),
                                    lr=self.lr,
                                    momentum=0.9,
                                    weight_decay=0.005)
        lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                                  mode='max',
                                                                  factor=0.75,
                                                                  patience=30,
                                                                  min_lr=0)
#         return {'optimizer': optimizer, 'lr_scheduler': lr_scheduler, 'monitor': 'Validation_mAP'}
        return {'optimizer': optimizer}

In [None]:
#Call Dataloader

inputs = glob('/kaggle/input/fulldataset-gta/trainval/*/*_image.jpg')
targets = glob('/kaggle/input/fulldataset-gta/trainval/*/*_bbox.bin')

inputs.sort()
targets.sort()

inputs_train, inputs_valid = inputs[:7000], inputs[7000:]
targets_train, targets_valid = targets[:7000], targets[7000:]
# inputs_train, inputs_valid = inputs[:2], inputs[1:4]
# targets_train, targets_valid = targets[:2], targets[1:4]

dataset_train = PrepareDataset(inputs=inputs_train,
                        targets=targets_train)

dataset_valid = PrepareDataset(inputs=inputs_valid,
                        targets=targets_valid)
dataloader_train = DataLoader(dataset=dataset_train,
                        batch_size=4,
                        shuffle=True,
                        num_workers=0,
                        collate_fn=collate_double)
dataloader_valid = DataLoader(dataset=dataset_valid,
                        batch_size=1,
                        shuffle=False,
                        num_workers=0,
                        collate_fn=collate_double)

In [None]:
model = FasterRCNN(backbone=backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=box_roi_pool)
model = model.to(device)

In [None]:
task = FasterRCNN_lightning(model=model)
# task.load_from_checkpoint('/kaggle/input/epoch19/epoch19-step34999.ckpt')

In [None]:
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor, EarlyStopping

# checkpoint_callback = ModelCheckpoint(monitor='Validation_mAP', mode='max')
learningrate_callback = LearningRateMonitor(logging_interval='step', log_momentum=False)
# early_stopping_callback = EarlyStopping(monitor='Validation_mAP', patience=50, mode='max')

# trainer init
from pytorch_lightning import Trainer

trainer = Trainer(gpus=1,
                  default_root_dir="/kaggle/working/",  # where checkpoints are saved to
                  log_every_n_steps=1,
                  num_sanity_val_steps=0,
                  max_epochs = 29,
                  logger=False,
                  resume_from_checkpoint = '/kaggle/input/epoch19/epoch19-step34999.ckpt'
                 )

In [None]:
trainer.fit(task,
            train_dataloaders = dataloader_train,
            val_dataloaders = dataloader_valid)

In [None]:
test_inputs = glob('/kaggle/input/fulltestdataset/test/*/*_image.jpg')

test_inputs.sort()

dataset_test = PrepareTestDataset(inputs=test_inputs)

In [None]:
### Validation loop #####

# task.eval()
# for i in range(len(test_inputs)):
#     d = dataset_valid[i]
#     x = d['x']
#     x = torch.unsqueeze(x, 0).to(device)
#     y = task(x)[0]
#     pred = y['labels'][0].item()
#     gt = d['y']['labels'][0].item()
    
#     if(pred==gt):
#         count += 1

In [None]:
#### Testing Loop #####

task.eval()
file_name = '/kaggle/working/submission.csv'
with open(file_name, 'w') as f:
    writer = csv.writer(f, delimiter=',', lineterminator='\n')
    writer.writerow(['guid/image', 'label'])
    for i in range(len(test_inputs)):
        if(i%100==0):
            print(i)
        d = dataset_test[i]
        x = d['x']
        x = torch.unsqueeze(x, 0).to(device)
        y = task(x)[0]
        pred = y['labels'][0].item()
        file = test_inputs[i]
        guid = file.split('/')[-2]
        idx = file.split('/')[-1].replace('_image.jpg', '')
        writer.writerow(['{}/{}'.format(guid,idx), pred])   
    
    
