In [25]:
ROOT_PATH = './Positive_data'
TEST_PATH = './test'
ckpt_dir = './pretrained/efficientdet_d5-ef44aea8.pth'
PREDICTION_THRES = 0.8
EPOCHS = 5
MIN_SIZE = 800
BATCH_SIZE = 2
DEBUG = False # to visualize the images before training

In [26]:
import os
import sys
import random
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob

# pytorch
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

# augmentation
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

# sci-kit learn
from sklearn.model_selection import StratifiedKFold

# etc
import time
import gc

In [27]:
# from effdet.config.model_config import efficientdet_model_param_dict
# from effdet import get_efficientdet_config, EfficientDet, DetBenchTrain
# from effdet.efficientdet import HeadNet
# from effdet.efficientdet import HeadNet
# from effdet.config.model_config import efficientdet_model_param_dict


from effdet import get_efficientdet_config, EfficientDet, DetBenchTrain
from effdet.efficientdet import HeadNet

In [28]:
# print(f'number of configs: {len(efficientdet_model_param_dict)}')

# list(efficientdet_model_param_dict.keys())[::]

In [29]:
##
def create_model(num_classes=1, image_size=512, architecture="tf_efficientdet_d5"):
#     efficientdet_model_param_dict['tf_efficientdet_d5'] = dict(
#         name='tf_efficientdet_d5',
#         backbone_name='tf_efficientdet_d5',
#         backbone_args=dict(drop_path_rate=0.2),
#         num_classes=num_classes,
#         url='', )

    config = get_efficientdet_config(architecture)
    config.update({'num_classes': num_classes})
    config.update({'image_size': (image_size, image_size)})

    #print(config)

    net = EfficientDet(config, pretrained_backbone=True)
    net.class_net = HeadNet(
        config,
        num_outputs=config.num_classes,
    )
    return DetBenchTrain(net, config)

In [30]:
def model(ckpt_path):
    net = create_model(num_classes=1, image_size=512, architecture='tf_efficientdet_d5')

    checkpoint = torch.load(ckpt_path)
    net.load_state_dict(checkpoint, strict=False)
    
    del checkpoint
    gc.collect()

    return net

In [31]:
class PotHoleDataset(Dataset):
    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()
        self.image_ids = dataframe['image_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms
        
    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        records = self.df[self.df['image_id'] == image_id]
        image = cv2.imread(f"{self.image_dir}/{image_id}.JPG", cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
    
        # convert the boxes into x_min, y_min, x_max, y_max format
        boxes = records[['x', 'y', 'w', 'h']].values
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        
        # we have only one class
        labels = torch.ones((records.shape[0],), dtype=torch.int64)   
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['img_scale'] = torch.tensor([1.])
        target['image_id'] = torch.tensor([index])  # It's not important >_<
        target['img_size'] = torch.tensor([(512, 512)])

        
        
        # apply the image transforms
        if self.transforms:
            while True:               
                sample = {
                    'image': image,
                    'bboxes': target['boxes'],
                    'labels': labels
                }
                sample = self.transforms(**sample)

                if len(sample['bboxes']) > 0:
                    image = sample['image']
                    # EfficientNet implementation in the effdet library takes bboxes in yxyx format
                    target['boxes'] = torch.tensor(sample['bboxes'])
                    target['boxes'][:, [0, 1, 2, 3]] = target['boxes'][:, [1, 0, 3, 2]]
                    target['labels'] = torch.stack(sample['labels']) # have to add this
                    break

        return image, target, image_id

    def __len__(self):
        return self.image_ids.shape[0]

In [32]:
def collate_fn(batch):
    """
    This function helps when we have different number of object instances
    in the batches in the dataset.
    """
    return tuple(zip(*batch))

In [33]:
# function for the image transforms
def train_transform():
    return A.Compose([
        A.Flip(0.5),
        # A.RandomRotate90(0.5),
        # A.MotionBlur(p=0.2),
        # A.MedianBlur(blur_limit=3, p=0.1),
        # A.Blur(blur_limit=3, p=0.1),
        A.Resize(height=512, width=512, p=1),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

In [34]:
# path to the input root directory
DIR_INPUT = ROOT_PATH
# read the annotation CSV file
train_df = pd.read_csv("./_df.csv")
print(train_df.head())
print(f"Total number of image IDs (objects) in dataframe: {len(train_df)}")
# get all the image paths as list
image_paths = glob.glob(f"{DIR_INPUT}/*.JPG")
image_names = []
for image_path in image_paths:
    image_names.append(image_path.split(os.path.sep)[-1].split('.')[0])
print(f"Total number of training images in folder: {len(image_names)}")
image_ids = train_df['image_id'].unique()
print(f"Total number of unique train images IDs in dataframe: {len(image_ids)}")
# number of images that we want to train out of all the unique images
train_ids = image_names[:] # use all the images for training
train_df = train_df[train_df['image_id'].isin(train_ids)]
print(f"Number of image IDs (objects) training on: {len(train_df)}")

   image_id  num_potholes     x     y    w   h
0  G0010033             6  1990  1406   66  14
1  G0010033             6  1464  1442   92  16
2  G0010033             6  1108  1450   54  16
3  G0010033             6   558  1434  102  16
4  G0010033             6   338  1450   72  18
Total number of image IDs (objects) in dataframe: 4592
Total number of training images in folder: 1119
Total number of unique train images IDs in dataframe: 1337
Number of image IDs (objects) training on: 3896


In [35]:
train_dataset = PotHoleDataset(train_df, DIR_INPUT, train_transform())
train_data_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    collate_fn=collate_fn
)

In [36]:
# the computation device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model(ckpt_dir).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0005)

TypeError: 'FeatureInfo' object is not callable

In [None]:
def train(train_dataloader):
    model.train()
    running_loss = 0
    for batch, (images, targets, image_ids) in enumerate(train_dataloader, 1):

#     for i, data in enumerate(train_dataloader):
        
        optimizer.zero_grad()
        # images, targets, images_ids = data[0], data[1], data[2]
        
        
        # images = list(image.to(device) for image in images)
#         targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        images = torch.stack(images)
        images = images.to(device).float()
        boxes = [target['boxes'].to(device).float() for target in targets]
        labels = [target['labels'].to(device).float() for target in targets]
        img_scale = torch.tensor([target['img_scale'].to(device).float() for target in targets])
        img_size = torch.tensor([(512, 512) for _ in targets]).to(device).float()

        target_res = {}
        target_res['bbox'] = boxes
        target_res['cls'] = labels
        target_res['img_scale'] = img_scale
        target_res['img_size'] = img_size

        
        
        loss_dict = model(images, target_res)
        
        
        loss = sum(loss for loss in loss_dict.values())
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
        if batch % 25 == 0:
            print(f"Iteration #{batch} loss: {loss}")
    train_loss = running_loss/len(train_dataloader.dataset)
    return train_loss

In [None]:
def save_model():
    torch.save(model.state_dict(), 'checkpoint/effdet_test.pth')

In [None]:
def visualize():
    """
    This function will only execute if `DEBUG` is `True` in 
    `config.py`.
    """
    images, targets, image_ids = next(iter(train_data_loader))
    images = list(image for image in images)
    targets = [{k: v for k, v in t.items()} for t in targets]
    for i in range(1):
        boxes = targets[i]['boxes'].cpu().numpy().astype(np.int32)
        sample = images[i].permute(1,2,0).cpu().numpy()
        fig, ax = plt.subplots(1, 1, figsize=(15, 12))
        for box in boxes:
            cv2.rectangle(sample,
                        (box[0], box[1]),
                        (box[2], box[3]),
                        (220, 0, 0), 3)
        ax.set_axis_off()
        plt.imshow(sample)
        plt.show()

In [None]:
if DEBUG:
    visualize()

In [None]:
num_epochs = EPOCHS
for epoch in range(num_epochs):
    start = time.time()
    train_loss = train(train_data_loader)
    print(f"Epoch #{epoch} loss: {train_loss}")   
    end = time.time()
    print(f"Took {(end - start) / 60} minutes for epoch {epoch}")

In [None]:
save_model()

In [None]:
import numpy as np
import cv2
import os
import torch
from tqdm import tqdm


In [None]:
# set the computation device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# load the model and the trained weights
model =create_model(num_classes=1, image_size=512)
model.load_state_dict(torch.load('checkpoint/effdet_test.pth'), strict=False)

In [None]:
DIR_TEST = TEST_PATH
test_images = os.listdir(DIR_TEST)
print(f"Validation instances: {len(test_images)}")

In [None]:
detection_threshold = PREDICTION_THRES
model.eval()
with torch.no_grad():
    for i, image in tqdm(enumerate(test_images), total=len(test_images)):
        orig_image = cv2.imread(f"{DIR_TEST}/{test_images[i]}", cv2.IMREAD_COLOR)
        image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB).astype(np.float32)
        # make the pixel range between 0 and 1
        image /= 255.0
        image = np.transpose(image, (2, 0, 1)).astype(np.float)
        image = torch.tensor(image, dtype=torch.float).cuda()
        image = torch.unsqueeze(image, 0)
        cpu_device = torch.device("cpu")
       

        target_res = {}
        target_res['bbox'] = boxes
        target_res['cls'] = labels
        target_res['img_scale'] = img_scale
        target_res['img_size'] = img_size
        
         outputs = model(image, target_res)["detections"]
        
        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        if len(outputs[0]['boxes']) != 0:
            for counter in range(len(outputs[0]['boxes'])):
                boxes = outputs[0]['boxes'].data.numpy()
                scores = outputs[0]['scores'].data.numpy()
                boxes = boxes[scores >= detection_threshold].astype(np.int32)
                draw_boxes = boxes.copy()
                
            for box in draw_boxes:
                cv2.rectangle(orig_image,
                            (int(box[0]), int(box[1])),
                            (int(box[2]), int(box[3])),
                            (0, 0, 255), 3)
                cv2.putText(orig_image, 'PotHole', 
                            (int(box[0]), int(box[1]-5)),
                            cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 
                            2, lineType=cv2.LINE_AA)
            cv2.imwrite(f"test_predictions/{test_images[i]}", orig_image,)
print('TEST PREDICTIONS COMPLETE')

In [None]:
torch.cuda.is_available()