In [1]:

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import torch
import torchvision
from torchvision import datasets, models
from torchvision.transforms import functional as FT
from torchvision import transforms as T
import pytorch_lightning as pl
import torchmetrics
from torchmetrics import Metric
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, sampler, random_split, Dataset
from torch.nn.utils.rnn import pad_sequence
from pytorch_lightning.callbacks import ModelCheckpoint
import copy
import math
from PIL import Image
import cv2
import albumentations as A  # our data augmentation library
from collections import defaultdict, deque
import datetime
import time
from tqdm import tqdm # progress bar
from torchvision.utils import draw_bounding_boxes
import matplotlib.pyplot as plt
import sys
%matplotlib inline
print(torch.__version__)
print(torchvision.__version__)
from pycocotools.coco import COCO
from albumentations.pytorch import ToTensorV2

# pip install pycocotools numpy pandas matplotlib albumentations torch torchvision pytorch-lightning opencv-python matplotlib

#from google.colab import drive
#drive.mount('/content/drive')

2.2.1+cu121
0.17.1+cu121


ModuleNotFoundError: No module named 'pycocotools'

# Hyperparamaters

In [None]:
# Hyperparameters
batch_size = 16
num_epochs= 1
lr = 0.001
image_size = [600, 600]
is_Test = False

In [None]:
def get_transforms(train=False):
    if train:
        transform = A.Compose([
            A.Resize(image_size[0], image_size[1]), # our input size can be 600px
            A.HorizontalFlip(p=0.3),
            A.VerticalFlip(p=0.3),
            A.RandomBrightnessContrast(p=0.1),
            A.ColorJitter(p=0.1),
            ToTensorV2()
        ], bbox_params=A.BboxParams(format='coco'))
    else:
        transform = A.Compose([
            A.Resize(image_size[0], image_size[1]), # our input size can be 600px
            ToTensorV2()
        ], bbox_params=A.BboxParams(format='coco'))
    return transform


# Dataset class

In [None]:
class PotholeDetectionClass(datasets.VisionDataset):
    def __init__(self, root, stage='/train', transform=None, target_transform=None, transforms=None, batch_size = batch_size):
        super().__init__(root, transforms, transform, target_transform)
        self.stage = stage #train, valid, test
        self.coco = COCO(root + stage + "/_annotations.coco.json") # annotations stored here
        self.ids = list(sorted(self.coco.imgs.keys()))
        self.ids = [id for id in self.ids if (len(self._load_target(id)) > 0)]
        self.batch_size = batch_size

    def _load_image(self, id: int):
        path = self.coco.loadImgs(id)[0]['file_name']
        path = "/" + path
        image = cv2.imread(self.root + self.stage + path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        return image
    def _load_target(self, id):
        return self.coco.loadAnns(self.coco.getAnnIds(id))

    def __getitem__(self, index):
        id = self.ids[index]
        image = self._load_image(id)
        target = self._load_target(id)
        target = copy.deepcopy(self._load_target(id))

        boxes = [t['bbox'] + [t['category_id']] for t in target]
        if self.transforms is not None:
            transformed = self.transforms(image=image, bboxes=boxes)

        image = transformed['image']
        boxes = transformed['bboxes']

        new_boxes = []
        for box in boxes:
            xmin = box[0]
            xmax = xmin + box[2]
            ymin = box[1]
            ymax = ymin + box[3]
            new_boxes.append([xmin, ymin, xmax, ymax])

        boxes = torch.tensor(new_boxes, dtype=torch.float32)

        targ = {}
        targ['boxes'] = boxes
        targ['labels'] = torch.tensor([t['category_id'] for t in target], dtype=torch.int64)
        targ['image_id'] = torch.tensor([t['image_id'] for t in target])
        targ['area'] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) # we have a different area
        targ['iscrowd'] = torch.tensor([t['iscrowd'] for t in target], dtype=torch.int64)
        return image.div(255), targ # scale images
    def __len__(self):
        return len(self.ids)

    def train_dataloader(self):
        train_dataset = self.root + "/train"
        return DataLoader(train_dataset, batch_size=self.batch_size, num_workers=0, shuffle=True)

    def val_dataloader(self):
        valid_dataset = self.root + "/valid"
        return DataLoader(valid_dataset, batch_size=self.batch_size, num_workers=0, shuffle=False)

    def test_dataloader(self):
        test_dataset = self.root + "/test"
        return DataLoader(test_dataset, batch_size=self.batch_size, num_workers=0, shuffle=False)

In [None]:
#dataset_path = "/content/drive/MyDrive/Uni/Github/Bachelor/Pytorch_lightning/Pothole_coco"
dataset_path = "/Pothole_coco"
dataset_path = os.getcwd() + dataset_path

In [None]:
coco = COCO(dataset_path + "/train" + "/_annotations.coco.json")
categories = coco.cats
n_classes = len(categories.keys())
categories

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


{0: {'id': 0, 'name': 'potholes', 'supercategory': 'none'},
 1: {'id': 1, 'name': 'pothole', 'supercategory': 'potholes'}}

In [None]:
classes = [i[1]['name'] for i in categories.items()]

In [None]:
train_dataset = PotholeDetectionClass(root=dataset_path, transforms=get_transforms(True))
test_dataset = PotholeDetectionClass(root=dataset_path, stage='/test', transforms=get_transforms(False))
valid_dataset = PotholeDetectionClass(root=dataset_path, stage= "/valid", transform=get_transforms(False))

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


# Plot image with predetermined bbox

In [None]:
#sample = train_dataset[6]
#img_int = torch.tensor(sample[0] * 255, dtype=torch.uint8)
#plt.imshow(draw_bounding_boxes(
#    img_int, sample[1]['boxes'], [classes[i] for i in sample[1]['labels']], width=4
#).permute(1, 2, 0))

In [None]:
model = models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, n_classes)

In [None]:
def custom_collate(batch):
    return tuple(zip(*batch))


In [None]:
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=0, collate_fn=custom_collate)
test_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False, num_workers=0, collate_fn=custom_collate)


Run to test if training works

In [None]:
#images,targets = next(iter(train_loader))
#images = list(image for image in images)
#targets = [{k:v for k, v in t.items()} for t in targets]
#output = model(images, targets)

Set device to Cuda

In [None]:
#device = torch.device("cuda") # use GPU to train
device = "cpu"
model = model.to(device)

In [None]:
# Now, and optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.01, momentum=0.9, nesterov=True, weight_decay=1e-4)

# Neural Network Class

In [None]:
class NN(pl.LightningModule):
    def __init__(self, model, optimizer, train_loader, test_loader):
        super().__init__()
        self.model = model
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.losses_dict = []
        self.loss = nn.HingeEmbeddingLoss()

    def forward(self, images, targets):
        return self.model(images, targets)

    def training_step(self, batch, batch_idx):
        images, targets = batch
        images = torch.stack([image.to(self.device) for image in images])
        targets = [{k: torch.tensor(v).to(self.device) for k, v in t.items()} for t in targets]

        loss_dict = self.model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()
        self.losses_dict.append({k: v.item() for k, v in loss_dict.items()})

        if not math.isfinite(loss_value):
            print(f"Loss is {loss_value}, stopping training")
            print(loss_dict)
            sys.exit(1)

        return losses

    def test_step(self, batch, batch_idx):
        images, targets = batch
        images = list(image.to(self.device) for image in images)
        targets = [{k: torch.tensor(v).to(self.device) for k, v in t.items()} for t in targets]

        loss_dict = self.model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()

        self.losses_dict.append({k: v.item() for k, v in loss_dict.items()})

        if not math.isfinite(loss_value):
            print(f"Loss is {loss_value}, stopping training")
            print(loss_dict)
            sys.exit(1)

        return losses

    def configure_optimizers(self):
        return self.optimizer

    def train_dataloader(self):
        return self.train_loader

    def test_dataloader(self):
        return self.test_loader

    def load_from_checkpoint(cls, checkpoint_path, model, optimizer, train_loader, test_loader):
        model = model.load_from_checkpoint(checkpoint_path)
        return cls(model, optimizer, train_loader, test_loader)

    

In [None]:
#for epoch in range(num_epochs):
#    train_one_epoch(model, optimizer, train_loader, device, epoch)


lightning_module = NN(model, optimizer, train_loader, test_loader)
# Initialize a Lightning Trainer
trainer = pl.Trainer(max_epochs=num_epochs)  # You can adjust the Trainer options

# Start training
trainer.fit(lightning_module, train_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type               | Params
---------------------------------------------
0 | model | FasterRCNN         | 18.9 M
1 | loss  | HingeEmbeddingLoss | 0     
---------------------------------------------
18.9 M    Trainable params
58.9 K    Non-trainable params
18.9 M    Total params
75.721    Total estimated model params size (MB)


Epoch 0:   0%|          | 0/30 [00:00<?, ?it/s] 

  targets = [{k: torch.tensor(v).to(self.device) for k, v in t.items()} for t in targets]


Epoch 0: 100%|██████████| 30/30 [00:54<00:00,  0.55it/s, v_num=10]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 30/30 [00:54<00:00,  0.55it/s, v_num=10]


In [None]:
ckpt_path = "lightning_logs/version_0/checkpoints/epoch=0-step=30.ckpt"
#model = pl.LightningModule.load_from_checkpoint(ckpt_path)
#print losses_dict
print ("-----------------------------")
print(lightning_module.losses_dict)
print ("-----------------------------")
#trainer = pl.Trainer(max_epochs=num_epochs+2)
#model.eval()
#trainer = trainer.fit(lightning_module,ckpt_path=ckpt_path)

#model = Net()
#optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

#checkpoint = torch.load(ckpt_path)
#model.load_state_dict(checkpoint['model_state_dict'])
#epoch = checkpoint['epoch']
#loss = checkpoint['loss']
#lightning_module = NN(model, optimizer, train_loader, test_loader, is_Test=True)
#load_checkpoint = NN.load_from_checkpoint(ckpt_path, model = model, optimizer = optimizer, train_loader = train_loader, test_loader = test_loader)
load_checkpoint = NN.load_from_checkpoint(ckpt_path, model, optimizer, train_loader, test_loader)
trainer = pl.Trainer()
trainer.test(load_checkpoint)
#is_Test = True
#trainer.test(ckpt_path=ckpt_path,dataloaders=train_loader)


##    img-245_jpg.rf.1c9b49a366bda1cf64dbfac2a946cd38.jpg

#torch.cuda.empty_cache()

-----------------------------
[{'loss_classifier': 0.6224026679992676, 'loss_box_reg': 0.2880636155605316, 'loss_objectness': 0.32039645314216614, 'loss_rpn_box_reg': 0.02279277890920639}, {'loss_classifier': 0.24826236069202423, 'loss_box_reg': 0.28460079431533813, 'loss_objectness': 0.1149812638759613, 'loss_rpn_box_reg': 0.014959920197725296}, {'loss_classifier': 0.28416433930397034, 'loss_box_reg': 0.31022411584854126, 'loss_objectness': 0.1291336566209793, 'loss_rpn_box_reg': 0.02203456684947014}, {'loss_classifier': 0.27589404582977295, 'loss_box_reg': 0.3082258701324463, 'loss_objectness': 0.11295930296182632, 'loss_rpn_box_reg': 0.013785094022750854}, {'loss_classifier': 0.2545846998691559, 'loss_box_reg': 0.30065301060676575, 'loss_objectness': 0.11376892030239105, 'loss_rpn_box_reg': 0.02285582758486271}, {'loss_classifier': 0.19994382560253143, 'loss_box_reg': 0.23665636777877808, 'loss_objectness': 0.11744051426649094, 'loss_rpn_box_reg': 0.01841791719198227}, {'loss_classi

TypeError: NN.load_from_checkpoint() missing 1 required positional argument: 'test_loader'

In [None]:
test_dataset[9]

ValueError: x_max is less than or equal to x_min for bbox (0.68, 0.5433333333333333, 0.68, 0.5466666666666666, 1).

In [None]:

print(test_dataset[9])
for i in range(9,10): #test_dataset.__len__()-30
    print(i)
    print(test_dataset[i])
    img, _ = test_dataset[i]
    img_int = torch.tensor(img*255, dtype=torch.uint8)
    with torch.no_grad():
        prediction = model([img.to(device)])
        pred = prediction[0]
        fig = plt.figure(figsize=(14, 10))
    plt.imshow(draw_bounding_boxes(img_int,
        pred['boxes'][pred['scores'] > 0.8],
        [classes[i] for i in pred['labels'][pred['scores'] > 0.8].tolist()], width=4
    ).permute(1, 2, 0))


ValueError: x_max is less than or equal to x_min for bbox (0.68, 0.5433333333333333, 0.68, 0.5466666666666666, 1).