In [76]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pytorch_lightning as pl
from torchinfo import summary
from torch.utils.data import DataLoader, Dataset, random_split
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torch.utils
import time
import torchvision
import torchmetrics as metrics
import torchvision.models as models
from pytorch_lightning.loggers import TensorBoardLogger
from PIL import Image
import json
import os
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torchmetrics.detection as detectionMetrics
import ast
trainDir = '/datasets/coco/train2014'
valDir = '/datasets/coco/val2014'

# Data preps (Run this when initializing the notebook for the first time)

In [5]:
with open('/datasets/coco/annotations/person_keypoints_train2014.json', 'r') as file:
    trainData = json.load(file)
    # data = pd.read_json(data)
# data
with open('/datasets/coco/annotations/person_keypoints_val2014.json', 'r') as file:
    valData = json.load(file)

In [6]:
trainImages = pd.DataFrame(trainData['images'])
trainAnnot = pd.DataFrame(trainData['annotations'])
valImages = pd.DataFrame(valData['images'])
valAnnot = pd.DataFrame(valData['annotations'])

In [7]:
trainImages

Unnamed: 0,license,file_name,coco_url,height,width,date_captured,flickr_url,id
0,5,COCO_train2014_000000057870.jpg,http://images.cocodataset.org/train2014/COCO_t...,480,640,2013-11-14 16:28:13,http://farm4.staticflickr.com/3153/2970773875_...,57870
1,5,COCO_train2014_000000384029.jpg,http://images.cocodataset.org/train2014/COCO_t...,429,640,2013-11-14 16:29:45,http://farm3.staticflickr.com/2422/3577229611_...,384029
2,1,COCO_train2014_000000222016.jpg,http://images.cocodataset.org/train2014/COCO_t...,640,480,2013-11-14 16:37:59,http://farm2.staticflickr.com/1431/1118526611_...,222016
3,3,COCO_train2014_000000520950.jpg,http://images.cocodataset.org/train2014/COCO_t...,427,640,2013-11-14 16:44:40,http://farm8.staticflickr.com/7007/6413705793_...,520950
4,4,COCO_train2014_000000069675.jpg,http://images.cocodataset.org/train2014/COCO_t...,480,640,2013-11-14 16:46:33,http://farm8.staticflickr.com/7156/6415223357_...,69675
...,...,...,...,...,...,...,...,...
82778,1,COCO_train2014_000000444010.jpg,http://images.cocodataset.org/train2014/COCO_t...,480,640,2013-11-25 14:46:11,http://farm4.staticflickr.com/3697/9303670993_...,444010
82779,3,COCO_train2014_000000565004.jpg,http://images.cocodataset.org/train2014/COCO_t...,427,640,2013-11-25 19:59:30,http://farm2.staticflickr.com/1278/4677568591_...,565004
82780,3,COCO_train2014_000000516168.jpg,http://images.cocodataset.org/train2014/COCO_t...,480,640,2013-11-25 21:03:34,http://farm3.staticflickr.com/2379/2293730995_...,516168
82781,4,COCO_train2014_000000547503.jpg,http://images.cocodataset.org/train2014/COCO_t...,375,500,2013-11-25 21:20:21,http://farm1.staticflickr.com/178/423174638_1c...,547503


In [8]:
trainAnnot

Unnamed: 0,segmentation,num_keypoints,area,iscrowd,keypoints,image_id,bbox,category_id,id
0,"[[329.88, 211.23, 337.63, 211.93, 341.51, 210....",16,3817.67415,0,"[311, 101, 2, 315, 98, 2, 308, 97, 2, 324, 97,...",196842,"[273.14, 82.25, 72.6, 129.68]",1,183022
1,"[[41.06, 411.11, 50.61, 388.19, 39.15, 360.5, ...",17,52541.14915,0,"[145, 113, 2, 158, 96, 2, 124, 97, 2, 184, 85,...",44474,"[33.42, 11.94, 303.68, 407.76]",1,183024
2,"[[237.59, 196.4, 261.87, 197.27, 286.15, 186, ...",15,10161.01650,0,"[306, 129, 2, 309, 124, 2, 301, 125, 2, 320, 1...",382669,"[237.59, 107.09, 110.99, 241.92]",1,183026
3,"[[248.55, 319.17, 255.13, 317.12, 252.25, 267....",12,1527.58730,0,"[238, 240, 2, 239, 237, 2, 235, 239, 2, 242, 2...",188163,"[229.23, 222.96, 29.6, 97.83]",1,183028
4,"[[293.27, 191.38, 298.11, 167.71, 296.32, 160....",11,2189.62475,0,"[290, 152, 2, 293, 151, 2, 289, 149, 2, 0, 0, ...",188440,"[244.13, 130.03, 57.03, 81.46]",1,183032
...,...,...,...,...,...,...,...,...,...
185311,"{'counts': [4138, 7, 418, 12, 413, 14, 1, 2, 1...",0,23580.00000,1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",128732,"[9, 284, 560, 141]",1,900100128732
185312,"{'counts': [111472, 7, 365, 11, 362, 13, 360, ...",0,25439.00000,1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",489186,"[298, 0, 341, 127]",1,900100489186
185313,"{'counts': [13254, 1, 316, 4, 6, 1, 315, 7, 31...",0,4227.00000,1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",390883,"[40, 104, 394, 43]",1,900100390883
185314,"{'counts': [99015, 6, 352, 8, 350, 9, 322, 11,...",0,6478.00000,1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",554743,"[275, 207, 153, 148]",1,900100554743


In [9]:
valImages

Unnamed: 0,license,file_name,coco_url,height,width,date_captured,flickr_url,id
0,3,COCO_val2014_000000391895.jpg,http://images.cocodataset.org/val2014/COCO_val...,360,640,2013-11-14 11:18:45,http://farm9.staticflickr.com/8186/8119368305_...,391895
1,4,COCO_val2014_000000522418.jpg,http://images.cocodataset.org/val2014/COCO_val...,480,640,2013-11-14 11:38:44,http://farm1.staticflickr.com/1/127244861_ab0c...,522418
2,3,COCO_val2014_000000184613.jpg,http://images.cocodataset.org/val2014/COCO_val...,336,500,2013-11-14 12:36:29,http://farm3.staticflickr.com/2169/2118578392_...,184613
3,3,COCO_val2014_000000318219.jpg,http://images.cocodataset.org/val2014/COCO_val...,640,556,2013-11-14 13:02:53,http://farm5.staticflickr.com/4125/5094763076_...,318219
4,3,COCO_val2014_000000554625.jpg,http://images.cocodataset.org/val2014/COCO_val...,640,426,2013-11-14 16:03:19,http://farm5.staticflickr.com/4086/5094162993_...,554625
...,...,...,...,...,...,...,...,...
40499,5,COCO_val2014_000000134574.jpg,http://images.cocodataset.org/val2014/COCO_val...,480,640,2013-11-25 14:27:46,http://farm8.staticflickr.com/7316/9531042830_...,134574
40500,1,COCO_val2014_000000572233.jpg,http://images.cocodataset.org/val2014/COCO_val...,427,640,2013-11-25 14:48:33,http://farm4.staticflickr.com/3751/9300793319_...,572233
40501,1,COCO_val2014_000000418825.jpg,http://images.cocodataset.org/val2014/COCO_val...,640,480,2013-11-25 15:04:26,http://farm8.staticflickr.com/7373/9111627012_...,418825
40502,1,COCO_val2014_000000560744.jpg,http://images.cocodataset.org/val2014/COCO_val...,480,640,2013-11-25 15:04:29,http://farm4.staticflickr.com/3791/9109408773_...,560744


In [10]:
valAnnot

Unnamed: 0,segmentation,num_keypoints,area,iscrowd,keypoints,image_id,bbox,category_id,id
0,"[[267.03, 243.78, 314.59, 154.05, 357.84, 136....",8,28292.08625,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",537548,"[267.03, 104.32, 229.19, 320]",1,183020
1,"[[640, 408.25, 639.54, 4.33, 546.25, 5.29, 426...",2,39122.63310,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",117891,"[206.77, 1.44, 433.23, 408.73]",1,183030
2,"[[332.57, 119.02, 327.1, 82.24, 330.75, 41.08,...",4,5634.56940,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",120021,"[276.12, 0.29, 61.18, 118.73]",1,183050
3,"[[364.84, 129.8, 363.74, 127.18, 363.52, 124.7...",0,672.72905,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",403255,"[355.87, 118.43, 30.84, 42.87]",1,183057
4,"[[187.03, 472.97, 192.43, 350.81, 181.62, 335....",13,26215.66095,0,"[0, 0, 0, 0, 0, 0, 252, 156, 2, 0, 0, 0, 248, ...",209468,"[178.38, 120.54, 114.59, 354.6]",1,183062
...,...,...,...,...,...,...,...,...,...
88148,"{'counts': [70219, 12, 627, 15, 623, 19, 619, ...",0,917.00000,1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",227049,"[109, 451, 30, 44]",1,900100227049
88149,"{'counts': [174, 16, 450, 55, 15, 20, 61, 78, ...",0,94736.00000,1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",431854,"[0, 0, 426, 252]",1,900100431854
88150,"{'counts': [52677, 20, 619, 46, 593, 48, 591, ...",0,1520.00000,1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",382715,"[82, 184, 312, 61]",1,900100382715
88151,"{'counts': [179, 27, 392, 41, 380, 51, 371, 59...",0,220834.00000,1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",250282,"[0, 34, 639, 388]",1,900100250282


In [11]:
coba = trainAnnot.loc[trainAnnot['image_id'] == 196842, 'bbox'].values.
# for i in coba:
print(coba)

SyntaxError: invalid syntax (3932305250.py, line 1)

In [13]:
def getData(imagesDf, annotDf):
    dataDict = []
    imageIdArr = annotDf['image_id'].values.tolist()
    for imgId in imageIdArr:
        tempDict = {}
        bboxes = annotDf.loc[annotDf['image_id'] == imgId, 'bbox'].values.tolist()
        categories = annotDf.loc[annotDf['image_id'] == imgId, 'category_id'].values.tolist()
        if len(bboxes) > 1:
            for ctgry, bbox in zip(categories, bboxes):
                tempDict2 = {}
                tempDict2['imageFileName'] = imagesDf.loc[imagesDf['id'] == imgId, 'file_name'].values[0]
                tempDict2['bbox'] = bbox
                tempDict2['label'] = ctgry
                dataDict.append(tempDict2)
        else:
            tempDict['imageFileName'] = imagesDf.loc[imagesDf['id'] == imgId, 'file_name'].values[0]
            tempDict['bbox'] = bboxes[0]
            tempDict['label'] = categories[0]
            dataDict.append(tempDict)
    return dataDict

In [14]:
trainDataArr = getData(trainImages, trainAnnot)
valDataArr = getData(valImages, valAnnot)

In [None]:
print(len(trainDataArr))

In [78]:
readyTrain = pd.DataFrame(trainDataArr)

In [79]:
readyTrain

Unnamed: 0,imageFileName,bbox,label
0,COCO_train2014_000000196842.jpg,"[tensor(273.1400), tensor(82.2500), tensor(72....",tensor(1)
1,COCO_train2014_000000196842.jpg,"[tensor(205.0400), tensor(98.0300), tensor(76....",tensor(1)
2,COCO_train2014_000000044474.jpg,"[tensor(33.4200), tensor(11.9400), tensor(303....",tensor(1)
3,COCO_train2014_000000382669.jpg,"[tensor(237.5900), tensor(107.0900), tensor(11...",tensor(1)
4,COCO_train2014_000000188163.jpg,"[tensor(229.2300), tensor(222.9600), tensor(29...",tensor(1)
...,...,...,...
1537985,COCO_train2014_000000095999.jpg,"[tensor(186.6100), tensor(266.6500), tensor(47...",tensor(1)
1537986,COCO_train2014_000000095999.jpg,"[tensor(621.7100), tensor(274.7500), tensor(8....",tensor(1)
1537987,COCO_train2014_000000095999.jpg,"[tensor(1.9200), tensor(249.7200), tensor(21.1...",tensor(1)
1537988,COCO_train2014_000000095999.jpg,"[tensor(459.5000), tensor(264.7700), tensor(16...",tensor(1)


In [80]:
readyTrain.to_json('cleanedData.csv', orient='records')

In [15]:
readyVal = pd.DataFrame(valDataArr)
readyVal.to_json('cleanedDataVal.csv', orient='records')

# Start here

In [24]:
trainDf = pd.read_csv('cleanedData.csv')
valDf = pd.read_csv('cleanedDataVal.csv')

In [77]:
trainDf.iloc[:, 1] = trainDf.iloc[:, 1].apply(lambda x : torch.tensor(ast.literal_eval(x)))
trainDf.iloc[:, 2] = trainDf.iloc[:, 2].apply(lambda x : torch.tensor(ast.literal_eval(x)))

valDf.iloc[:, 1] = valDf.iloc[:, 1].apply(lambda x : torch.tensor(ast.literal_eval(x)))
valDf.iloc[:, 2] = valDf.iloc[:, 2].apply(lambda x : torch.tensor(ast.literal_eval(x)))

ValueError: malformed node or string: <ast.Call object at 0x7f37e83adf10>

In [17]:
trainingReady

Unnamed: 0,imageFileName,bbox,label
0,COCO_train2014_000000196842.jpg,"tensor([273.1400, 82.2500, 72.6000, 129.6800])",tensor(1)
1,COCO_train2014_000000196842.jpg,"tensor([205.0400, 98.0300, 76.3300, 113.7400])",tensor(1)
2,COCO_train2014_000000044474.jpg,"tensor([ 33.4200, 11.9400, 303.6800, 407.7600])",tensor(1)
3,COCO_train2014_000000382669.jpg,"tensor([237.5900, 107.0900, 110.9900, 241.9200])",tensor(1)
4,COCO_train2014_000000188163.jpg,"tensor([229.2300, 222.9600, 29.6000, 97.8300])",tensor(1)
...,...,...,...
1537985,COCO_train2014_000000095999.jpg,"tensor([186.6100, 266.6500, 47.1900, 126.9900])",tensor(1)
1537986,COCO_train2014_000000095999.jpg,"tensor([621.7100, 274.7500, 8.6300, 30.4700])",tensor(1)
1537987,COCO_train2014_000000095999.jpg,"tensor([ 1.9200, 249.7200, 21.1100, 30.7100])",tensor(1)
1537988,COCO_train2014_000000095999.jpg,"tensor([459.5000, 264.7700, 16.9600, 75.0500])",tensor(1)


In [18]:
validationReady

Unnamed: 0,imageFileName,bbox,label
0,COCO_val2014_000000537548.jpg,"tensor([267.0300, 104.3200, 229.1900, 320.0000])",tensor(1)
1,COCO_val2014_000000117891.jpg,"tensor([206.7700, 1.4400, 433.2300, 408.7300])",tensor(1)
2,COCO_val2014_000000120021.jpg,"tensor([276.1200, 0.2900, 61.1800, 118.7300])",tensor(1)
3,COCO_val2014_000000120021.jpg,"tensor([167.9000, 0.6200, 66.2400, 98.7700])",tensor(1)
4,COCO_val2014_000000120021.jpg,"tensor([ 65.7000, 0.0000, 89.8200, 124.3400])",tensor(1)
...,...,...,...
735208,COCO_val2014_000000046847.jpg,"tensor([573.5400, 83.6500, 7.8400, 19.4500])",tensor(1)
735209,COCO_val2014_000000046847.jpg,"tensor([545.6000, 101.4700, 7.3600, 12.2500])",tensor(1)
735210,COCO_val2014_000000046847.jpg,"tensor([622.5400, 72.9400, 8.8800, 27.2300])",tensor(1)
735211,COCO_val2014_000000046847.jpg,"tensor([486.3300, 104.3100, 3.9100, 13.7100])",tensor(1)


In [71]:
class customDataset(Dataset):
    def __init__(self, imageDir, dataDf, transform=None):
        self.transform = transform
        self.imageDir = imageDir
        self.dataDf = dataDf
        
    def __len__(self):
        return len(self.dataDf)
    
    def _convertXyxy(self, box):
        print(box[0])
        x, y, w, h = box
        xMin = x
        yMin = y
        xMax = x + w
        yMax = y + h
        return torch.tensor([xMin, yMin, xMax, yMax])
        
    def __getitem__(self, idx):
        fileName = self.dataDf.iloc[idx, 0]
        imgPath = os.path.join(self.imageDir, fileName)
        img = Image.open(imgPath).convert('RGB')
        label = self.dataDf.iloc[idx, 2]
        boxes = self._convertXyxy(self.dataDf.iloc[idx, 1])
        
        if self.transform:
            img = self.transform(img)
        
        return img, {'boxes' : boxes, 'label' : label}

# trainSet = customDataset(trainDir, trainDf)
# valSet = customDataset(valDir, valDf)

In [72]:
class dataModule(pl.LightningDataModule):
    def __init__(self, trainingDir = trainDir, validationDir = valDir, trainingDf = trainDf, validationDf = valDf, batchSize = 4):
        super().__init__()
        self.trainingDir = trainingDir
        self.validationDir = validationDir
        self.trainingDf = trainingDf
        self.validationDf = valDf
        self.batchSize = batchSize
        self.classes = ['Background', 'Person']
    
    def prepare_data(self):
        pass
    
    def setup(self, stage):
        mean = [.485, .456, .406]
        std = [.229, .224, .225]
        transforms = A.Compose([
            A.Normalize(mean=mean, std=std),
            ToTensorV2()
        ])
        # transform = transforms.Compose([
        #     transforms.ToTensor()
        #     transforms.Normalize(mean=mean, std=std)
        # ])
        self.trainSet = customDataset(self.trainingDir, self.trainingDf, transform=transforms)
        self.valSet = customDataset(self.validationDir, self.validationDf, transform=transforms)
    
    def train_dataloader(self):
        return DataLoader(self.trainSet, batch_size=self.batchSize, shuffle=True, num_workers=8, drop_last=True)
    
    def val_dataloader(self):
        return DataLoader(self.valSet, batch_size=self.batchSize, shuffle=False, num_workers=8, drop_last=True)

In [73]:
class inferenceTime(pl.Callback):
    def on_predict_start(self, trainer, pl_module):
        self.startTime = time.time()
    def on_predict_end(self, trainer, pl_module):
        endTime = time.time() - self.startTime
        print(f'Inference Time per image : {endTime%60:.4f}')

class ResNet50_FRCNN(pl.LightningModule):
    def __init__(self, lr, decay=0):
        super().__init__()
        backbone = models.detection.fasterrcnn_resnet50_fpn(weights='DEFAULT', weights_backbone='DEFAULT')
        in_features = backbone.roi_heads.box_predictor.cls_score.in_features
        self.model = backbone
        self.model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes=2)
        # self.mAP = detectionMetrics.mean_ap.MeanAveragePrecision(box_format='xyxy', iou_type='bbox')
        self.IoU = detectionMetrics.IntersectionOverUnion()
        self.lr = lr
        self.decay = decay
        self.mean = [.485, .456, .406]
        self.std = [.229, .224, .225]
        self.unnormalize = transforms.Normalize(mean=[-m/s for m, s in zip(self.mean, self.std)], std=[1/s for s in self.std])
    def forward(self, x):
        x = self.model(x)
        return x
    def training_step(self, batch, batch_idx):
        images, targets = batch
        imageList = [image for image in images]
        targetList = [target for target in targets]
        outputs = self.model(imageList, targetList)
        losses = sum(loss for loss in outputs.values())
        iou = self.IoU(outputs, targetList)
        self.log_dict({'IoU' : iou, 'training_loss' : losses}, on_step=False, on_epoch=True, prog_bar=True)
        return losses
    def validation_step(self, batch, batch_idx):
        images, targets = batch
        imageList = [image for image in images]
        targetList = [target for target in targetList]
        outputs = self.model(imageList)
        # MAP = self.mAP(outputs, targetList)
        losses = sum(loss for loss in outputs.values())
        self.log_dict({'validation_loss' : losses}, on_step=False, on_epoch=True, prog_bar=True)
        return losses
    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.decay)

In [74]:
dataMod = dataModule()
model = ResNet50_FRCNN(0.0003)
trainer = pl.Trainer(accelerator='gpu', devices=[0], max_epochs=2, precision=32)
trainer.fit(model=model, datamodule=dataMod)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type                  | Params
------------------------------------------------------
0 | model       | FasterRCNN            | 41.3 M
1 | IoU         | IntersectionOverUnion | 0     
2 | unnormalize | Normalize             | 0     
------------------------------------------------------
41.1 M    Trainable params
222 K     Non-trainable params
41.3 M    Total params
165.197   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

t
ttt

t
t

tt

tt

t
tt
tt


t
ttt


t
t
t
t


ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
    data = fetcher.fetch(index)
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/usr/local/lib/python3.9/dist-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/tmp/ipykernel_71/3828179676.py", line 24, in __getitem__
    boxes = self._convertXyxy(self.dataDf.iloc[idx, 1])
  File "/tmp/ipykernel_71/3828179676.py", line 12, in _convertXyxy
    x, y, w, h = box
ValueError: too many values to unpack (expected 4)
