In [115]:
ROOT_PATH = '../../data/'
PREDICTION_THRES = 0.8
EPOCHS = 10
MIN_SIZE = 800
BATCH_SIZE = 4
DEBUG = False # to visualize the images before training

In [116]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import  FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

import time
import os
import numpy as np
import cv2
import glob
import albumentations as A
import pandas as pd
from torch.utils.data import Dataset
from albumentations.pytorch.transforms import ToTensorV2
from torch.utils.data import DataLoader

In [117]:
def model():
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True,min_size=MIN_SIZE)
    # one class is for pot holes, and the other is background
    num_classes = 2
    # get the input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace pre-trained head with our features head
    # the head layer will classify the images based on our data input features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

In [118]:
class PotHoleDataset(Dataset):
    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()
        self.image_ids = dataframe['path'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms
        
    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        records = self.df[self.df['path'] == image_id]
        image = cv2.imread(self.image_dir+image_id, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        
        # convert the boxes into x_min, y_min, x_max, y_max format
        boxes = records[['x', 'y', 'w', 'h']].values
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        
        # get the area of the bounding boxes
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        area = torch.as_tensor(area, dtype=torch.float32)
        # we have only one class
        labels = torch.ones((records.shape[0],), dtype=torch.int64)
        
        # supposing that all instances are not crowd
        iscrowd = torch.zeros((records.shape[0],), dtype=torch.int64)
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['image_id'] = torch.tensor([index])
        target['area'] = area
        target['iscrowd'] = iscrowd
        # apply the image transforms
        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)
            image = sample['image']
            
            # convert the bounding boxes to PyTorch `FloatTensor`
            target['boxes'] = torch.stack(tuple(map(torch.FloatTensor, 
                                                    zip(*sample['bboxes'])))).permute(1, 0)
        return image, target, image_id
    def __len__(self):
        return self.image_ids.shape[0]

In [119]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [120]:
# function for the image transforms
def train_transform():
    return A.Compose([
        A.Flip(0.5),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(0.5),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

In [121]:
# read the annotation CSV file
train_df = pd.read_csv("../../data/df.csv")
len(train_df['path'].values)

4220

In [122]:
train_dataset = PotHoleDataset(train_df, ROOT_PATH, train_transform())
train_data_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    collate_fn=collate_fn
)

In [123]:
# the computation device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0005)

In [124]:
def train(train_dataloader):
    model.train()
    running_loss = 0
    for i, data in enumerate(train_dataloader):
        
        optimizer.zero_grad()
        images, targets, images_ids = data[0], data[1], data[2]
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        loss_dict = model(images, targets)
        loss = sum(loss for loss in loss_dict.values())
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
        if i % 25 == 0:
            print(f"Iteration #{i} loss: {loss}")
    train_loss = running_loss/len(train_dataloader.dataset)
    return train_loss

In [125]:
def save_model():
    torch.save(model.state_dict(), './fasterrcnn_resnet50_fpn.pth')

In [126]:
def visualize():
    """
    This function will only execute if `DEBUG` is `True` in 
    `config.py`.
    """
    images, targets, image_ids = next(iter(train_data_loader))
    images = list(image for image in images)
    targets = [{k: v for k, v in t.items()} for t in targets]
    for i in range(1):
        boxes = targets[i]['boxes'].cpu().numpy().astype(np.int32)
        sample = images[i].permute(1,2,0).cpu().numpy()
        fig, ax = plt.subplots(1, 1, figsize=(15, 12))
        for box in boxes:
            cv2.rectangle(sample,
                        (box[0], box[1]),
                        (box[2], box[3]),
                        (220, 0, 0), 3)
        ax.set_axis_off()
        plt.imshow(sample)
        plt.show()

In [127]:
if DEBUG:
    visualize()

In [128]:
num_epochs = EPOCHS
for epoch in range(num_epochs):
    start = time.time()
    train_loss = train(train_data_loader)
    print(f"Epoch #{epoch} loss: {train_loss}")   
    end = time.time()
    print(f"Took {(end - start) / 60} minutes for epoch {epoch}")

Iteration #0 loss: 0.9971181750297546
Iteration #25 loss: 0.2045029252767563
Iteration #50 loss: 0.18742145597934723
Iteration #75 loss: 0.5487701892852783
Iteration #100 loss: 0.35096919536590576
Iteration #125 loss: 0.25723928213119507
Iteration #150 loss: 0.1721670925617218
Iteration #175 loss: 0.2226804494857788
Iteration #200 loss: 0.1561579704284668
Iteration #225 loss: 0.4392942488193512
Iteration #250 loss: 0.3433094918727875
Iteration #275 loss: 0.3212968707084656
Iteration #300 loss: 0.35688844323158264
Iteration #325 loss: 0.29904186725616455
Epoch #0 loss: 0.0819005481206945
Took 5.881178259849548 minutes for epoch 0
Iteration #0 loss: 0.15747389197349548
Iteration #25 loss: 0.0897674411535263
Iteration #50 loss: 0.12598007917404175
Iteration #75 loss: 0.40307003259658813
Iteration #100 loss: 0.1770906299352646
Iteration #125 loss: 0.1774076372385025
Iteration #150 loss: 0.1497049778699875
Iteration #175 loss: 0.1410556435585022
Iteration #200 loss: 0.12318956851959229
Iter

In [129]:
save_model()