In [91]:
import numpy as np
import pandas as pd
import os
import torch
import torchvision
from torchvision import datasets, models
from torchvision.transforms import functional as FT
from torchvision import transforms as T
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
import torchmetrics
from torchmetrics import Metric
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, sampler, random_split, Dataset
from torch.nn.utils.rnn import pad_sequence
from pytorch_lightning.callbacks import ModelCheckpoint
from torchvision.datasets import CocoDetection
import copy
import math
from PIL import Image
import cv2
import albumentations as A 
from collections import defaultdict, deque
import datetime
import time
from tqdm import tqdm # progress bar
from torchvision.utils import draw_bounding_boxes
import matplotlib.pyplot as plt
import sys
%matplotlib inline
print(torch.__version__)
print(torchvision.__version__)
from pycocotools.coco import COCO
from albumentations.pytorch import ToTensorV2
#import wandb
import matplotlib.patches as patches



2.2.1+cu121
0.17.1+cu121


# Hyper paramaters

In [92]:
# Hyperparameters
batch_size = 16
num_epochs= 1
lr = 0.001
image_size = [600, 600]
is_Test = False
wandb_on = False
device_cuda = False
num_workers = 0
if wandb_on:
    wandb.login()

    wandb.init(
        # set the wandb project where this run will be logged
        project="Bachelor0386",
        
        # track hyperparameters and run metadata
        config={
        "architecture": "Faster RCNN",
        "dataset": "CustomDataset",
        "epochs": 1,
        }
    )

if device_cuda == True:
    print("Using GPU")
    device = torch.device("cuda") # use GPU to train
else:
    print("Using CPU")
    device = "cpu"


Using CPU


# Transforms

In [93]:

def get_transforms(train=False):
    if train:
        transform = A.Compose([
            A.Resize(image_size[0], image_size[1]), 
            A.HorizontalFlip(p=0.3),
            A.VerticalFlip(p=0.3),
            A.RandomBrightnessContrast(p=0.1),
            A.ColorJitter(p=0.1),
            ToTensorV2()
        ], bbox_params=A.BboxParams(format='coco'))
    else:
         transform = A.Compose([
             A.Resize(image_size[0], image_size[1]), 
             ToTensorV2()
         ], bbox_params=A.BboxParams(format='coco'))
    return transform

# Data Import

In [94]:
class PotholeDetectionClass(datasets.VisionDataset):
    def __init__(self, root, stage='/train', transform=None, target_transform=None, transforms=None, batch_size = batch_size):
        super().__init__(root, transforms, transform, target_transform)
        self.stage = stage #train, valid, test
        self.coco = COCO(root + stage + "/_annotations.coco.json") # annotations stored here
        self.ids = list(sorted(self.coco.imgs.keys()))
        self.ids = [id for id in self.ids if (len(self._load_target(id)) > 0)]
        self.batch_size = batch_size

    def _load_image(self, id: int):
        path = self.coco.loadImgs(id)[0]['file_name']
        path = "/" + path
        image = cv2.imread(self.root + self.stage + path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        return image
    def _load_target(self, id):
        return self.coco.loadAnns(self.coco.getAnnIds(id))

    def __getitem__(self, index):
        id = self.ids[index]
        image = self._load_image(id)
        target = self._load_target(id)
        target = copy.deepcopy(self._load_target(id))

        boxes = [t['bbox'] + [t['category_id']] for t in target]

        transformed = self.transforms(image=image, bboxes=boxes)
        image = transformed['image']
        boxes = transformed['bboxes']

        new_boxes = []
        for box in boxes:
            xmin = box[0]
            xmax = xmin + box[2]
            ymin = box[1]
            ymax = ymin + box[3]
            new_boxes.append([xmin, ymin, xmax, ymax])

        boxes = torch.tensor(new_boxes, dtype=torch.float32)

        targ = {}
        targ['boxes'] = boxes
        targ['labels'] = torch.tensor([t['category_id'] for t in target], dtype=torch.int64)
        targ['image_id'] = torch.tensor([t['image_id'] for t in target])
        targ['area'] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) # we have a different area
        targ['iscrowd'] = torch.tensor([t['iscrowd'] for t in target], dtype=torch.int64)
        image = torch.tensor(image, dtype=torch.float32).div(255)

           # Convert target dictionary into tensors
        targ = {key: torch.tensor(val) for key, val in targ.items()}
        return image, targ # scale images
    
    def __len__(self):
        return len(self.ids)
    
dataset_path = "/Pothole_coco"
dataset_path = os.getcwd() + dataset_path

coco = COCO(dataset_path + "/train" + "/_annotations.coco.json")
categories = coco.cats
n_classes = len(categories.keys())

train_dataset = PotholeDetectionClass(root=dataset_path, transforms=get_transforms(True))
test_dataset = PotholeDetectionClass(root=dataset_path, stage='/test', transforms=get_transforms(True))
valid_dataset = PotholeDetectionClass(root=dataset_path, stage= "/valid", transform=get_transforms(True))

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


# Model 

In [95]:
model = models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, n_classes)

# Cuda
print(device)
model = model.to(device)

Testing DataLoader 0:   0%|          | 0/5 [12:47<?, ?it/s]
Testing DataLoader 0:   0%|          | 0/5 [08:32<?, ?it/s]
Testing DataLoader 0:   0%|          | 0/5 [08:01<?, ?it/s]
Testing DataLoader 0:   0%|          | 0/5 [07:04<?, ?it/s]
cpu


# DataLoader

In [96]:
def custom_collate(batch):
    return tuple(zip(*batch))

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, collate_fn=custom_collate)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=custom_collate)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=custom_collate)

# Optimizer

In [97]:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.01, momentum=0.9, nesterov=True, weight_decay=1e-4)
#optimizer = torch.optim.Adam(params, lr=0.001)
#optimizer = torch.optim.PSO(params, lr=0.001) #particle swarm optimization

# Additional

In [98]:
def plot_image(img_tensor, annotation,phase='train'):

    # fig,ax = plt.subplots(1)
    # img = img_tensor.cpu()

    # ax.imshow(img.permute(1, 2, 0))
    # for idx,box in enumerate(annotation["boxes"]):
      
    #     xmin, ymin, xmax, ymax = box
    #     color=['r','g','b','r']
    #     classes=['no mask','Masked','Improper masking','No-mask']
    #     # Create a Rectangle patch
    #     rect = patches.Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),linewidth=3,edgecolor=color[annotation['labels'][idx]],facecolor='none')
    #     ax.text(xmin, ymin, classes[annotation['labels'][idx]],color='black',bbox=dict(facecolor=color[annotation['labels'][idx]], alpha=0.1))
    #     # Add the patch to the Axes
    #     ax.add_patch(rect)
    # plt.show()
    classes = [i[1]['name'] for i in categories.items()]
    device = "cpu"
    for i in range(8,10): #test_dataset.__len__()-30
        img, _ = test_dataset[i]
        img_int = torch.tensor(img*255, dtype=torch.uint8)
        with torch.no_grad():
            prediction = model([img.to(device)])
            pred = prediction[0]
            fig = plt.figure(figsize=(14, 10))
        plt.imshow(draw_bounding_boxes(img_int,
            pred['boxes'][pred['scores'] > 0.8],
            [classes[i] for i in pred['labels'][pred['scores'] > 0.8].tolist()], width=4
        ).permute(1, 2, 0))

def accuracyMetric(preds,annotations):
    non_accurate=0
    accurate=0
    def csm(A,B,corr):
        if corr:
            B=B-B.mean(axis=1)[:,np.newaxis]
            A=A-A.mean(axis=1)[:,np.newaxis]
        num=np.dot(A,B.T)
        p1=np.sqrt(np.sum(A**2,axis=1))[:,np.newaxis]
        p2=np.sqrt(np.sum(B**2,axis=1))[np.newaxis,:]
        return 1-(num/(p1*p2))
    inds=torch.where((preds['scores'])>0.91)
    distMatrix=csm(np.array(preds['boxes'][inds].cpu()),np.array(annotations['boxes'].cpu()),True)

    for i in range (distMatrix.shape[0]):
        cla=np.argmin(distMatrix[i,:])

        if preds['labels'][i]%3==annotations['labels'][cla]:
            accurate+=1
        else:
            non_accurate+=1
    allSamp=np.max(((accurate+non_accurate),len(annotations['labels'])))
    return (accurate/allSamp)

# Neural Network

In [99]:
class NN(pl.LightningModule):
    def __init__(self, model, optimizer, train_loader, test_loader):
        super().__init__()
        self.model = model
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.losses_dict = []
        self.loss = nn.BCEWithLogitsLoss()  # Binary Cross-Entropy Loss
        

    # def forward(self, images, target):
    #     return self.model(images, target)

    def forward(self, x, annotations=None, phase='train'):
        if phase=='train':
            out=self.model(x,annotations)
        else:
            self.model.eval()
            out=model(x[0])
        # no activation and no softmax at the end
        return out

   

    def training_step(self, batch, batch_idx):
        imgs, annotations = batch
        imgs = list(img for img in imgs)
        annotations = [{k: v for k, v in t.items()} for t in annotations]
        

        # Forward pass
        loss_dict = self(imgs,annotations)
        losses = sum(loss for loss in loss_dict.values())
        if wandb_on:
            wandb.log({"train/loss": losses})
        occurrences = np.count_nonzero(annotations[0]['labels'].cpu() == 2)
        occurrences2 = np.count_nonzero(annotations[0]['labels'].cpu() == 1)
        occurrences = occurrences/(occurrences2+1 )

        if occurrences>=1:

            occurrences=np.clip(occurrences,1,4)
            loss_dict['loss_classifier']=occurrences*4*loss_dict['loss_classifier']
            print(f'Weighted {occurrences}')

        elif losses<0.2:
            for k,v in zip(loss_dict,loss_dict.values()):
                loss_dict[k]=v*0
        loss = sum(loss for loss in loss_dict.values())
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
        tensorboard_logs = {'train_loss': loss , 'classifier_loss': loss_dict['loss_classifier'],
                            'box_reg_loss':loss_dict['loss_box_reg']}
        # use key 'log'

        return {"loss": loss, 'log': tensorboard_logs}

    def test_step(self,batch,batch_idx):
        imgs, annotations = batch
        #imgs = list(img for img in imgs)
        annotations = [{k: v for k, v in t.items()} for t in annotations]
        preds = self(list([imgs]),phase='test')
        #print(preds['labels'])
        
    

        
        plot_image(imgs[0], preds[0])
        plot_image(imgs[0], annotations[0],phase='test')
        self.log('accuracy',(accuracyMetric(preds[0], annotations[0])))
        print((accuracyMetric(preds[0], annotations[0])))

    # def test_step(self, batch, batch_idx):
    #     imgs, annotations = batch
    #     imgs = list(img for img in imgs)
    #     annotations = [{k: v for k, v in t.items()} for t in annotations]

    #     # Forward pass
    #     loss_dict = self(imgs[0], annotations)
    #     losses = sum(loss for loss in loss_dict.values())

    #     # Log the test loss
    #     self.log('test_loss', losses, on_step=True, on_epoch=True, prog_bar=True)

    #     return {"test_loss": losses}
    
    # def test_step(self,batch,batch_idx):
    #     imgs, annotations = batch
    #     #imgs = list(img for img in imgs)
    #     annotations = [{k: v for k, v in annotations.items()}]
    #     preds = self(list([imgs]),phase='test')
    #     #print(preds['labels'])
        
    #     plot_image(imgs, preds[0])
    #     plot_image(imgs, annotations[0],phase='test')
    #     self.log('accuracy',(accuracyMetric(preds[0], annotations[0])))
    #     print((accuracyMetric(preds[0], annotations[0])))


    
    
    def configure_optimizers(self):
        return self.optimizer

    def train_dataloader(self):
        return self.train_loader

    def test_dataloader(self):
        return self.test_loader

    def load_from_checkpoint(cls, checkpoint_path, model, optimizer, train_loader, test_loader):
        model = model.load_from_checkpoint(checkpoint_path)
        return cls(model, optimizer, train_loader, test_loader)

    

# Training

In [100]:
lightning_module = NN(model, optimizer, train_loader, test_loader)
# Initialize a Lightning Trainer
if wandb_on:
    wandb_logger = WandbLogger(project='Bachelor', job_type='train')
    wandb_logger.watch(model, log="all")
    trainer = pl.Trainer(max_epochs=num_epochs,logger=wandb_logger)  # You can adjust the Trainer options
else:
    trainer = pl.Trainer(max_epochs=num_epochs)
# Start training


trainer.fit(lightning_module, train_loader)

if wandb_on:
    wandb.finish()

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name  | Type              | Params
--------------------------------------------
0 | model | FasterRCNN        | 18.9 M
1 | loss  | BCEWithLogitsLoss | 0     
--------------------------------------------
18.9 M    Trainable params
58.9 K    Non-trainable params
18.9 M    Total params
75.721    Total estimated model params size (MB)


Epoch 0:   0%|          | 0/30 [00:00<?, ?it/s] 

  image = torch.tensor(image, dtype=torch.float32).div(255)
  targ = {key: torch.tensor(val) for key, val in targ.items()}


Epoch 0: 100%|██████████| 30/30 [00:25<00:00,  1.19it/s, v_num=83, train_loss_step=0.244, train_loss_epoch=0.554]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 30/30 [00:26<00:00,  1.15it/s, v_num=83, train_loss_step=0.244, train_loss_epoch=0.554]


# Testing

In [101]:
trainer.test(lightning_module, test_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

  image = torch.tensor(image, dtype=torch.float32).div(255)
  targ = {key: torch.tensor(val) for key, val in targ.items()}


Testing DataLoader 0:   0%|          | 0/5 [00:00<?, ?it/s]

  img_int = torch.tensor(img*255, dtype=torch.uint8)


RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor