In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
cd /content/gdrive/MyDrive/data_visualization/datavis_data/

/content/gdrive/MyDrive/data_visualization/datavis_data


In [3]:
import os
import collections
import pandas as pd
import numpy as np
import functools
import matplotlib.pyplot as plt
import cv2

from sklearn import preprocessing 


import xml.etree.ElementTree as ET

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torchvision

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

from torch.utils.data import DataLoader, Dataset
from torch.utils.data import SequentialSampler

In [4]:
XML_PATH = "annotation"
IMG_PATH = "train"
XML_FILES = [os.path.join(XML_PATH, f) for f in os.listdir(XML_PATH)]
XML_FILES = XML_FILES[:1000] #first 1000

len(XML_FILES)

1000

In [5]:
class XmlParser(object):

    def __init__(self,xml_file):

        self.xml_file = self.filter(xml_file)
        
        # path to the image file = name of annotation file
        self.img_name = xml_file.split('/')[1].split('.')[0]+".jpg";
        self.img_path = os.path.join(IMG_PATH, self.img_name)

        # image id 
        self.image_id = self.img_name.split('.')[0];

        # names of the classes contained in the xml file
        self.names = self._get_names()
        # coordinates of the bounding boxes
        self.boxes = self._get_bndbox()

    def filter(self,xml_file):

        filtered_data = []

        f = open(xml_file, 'r')

        for line in f:
            data = line.split(',')
            if data[5]!='0' and (data[5]=='1' or data[5]=='2'):
                data = [int(x.strip()) for x in data]
                filtered_data.append(data)

                #augmentation for people
                if data[5]=='1' or data[5]=='2':
                  filtered_data.append(data)

        return filtered_data

    def _get_names(self):

        label_dict = {
                      0 : "Ignore",
                      1 : "Pedestrian",
                      2 : "People",
                      3 : "Bicycle",
                      4 : "Car",
                      5 : "Van",
                      6 : "Truck",
                      7 : "Tricycle",
                      8 : "Awning-tricycle",
                      9 : "Bus",
                      10 : "Motor",
                      11 : "Others"
                    }

        names = []

        for data in self.xml_file:
              class_id = data[5]
              names.append(label_dict[class_id])

        return np.array(names)

    def _get_bndbox(self):

        boxes = []

        for data in self.xml_file:
            
            coordinates = []
            coordinates.append(np.int32(data[0])) #xmin
            coordinates.append(np.int32(np.float32(data[1]))) #ymin
            coordinates.append(np.int32(data[2]+data[0])) #xmax
            coordinates.append(np.int32(data[3]+data[1])) #ymax
            boxes.append(coordinates)

        return np.array(boxes)

# xml = XmlParser('Annotations/0000007_05999_d_0000038.txt')

In [6]:
def xml_files_to_df(xml_files):
    
    """"Return pandas dataframe from list of XML files."""
    
    names = []
    boxes = []
    image_id = []
    xml_path = []
    img_path = []
    for file in xml_files:
        xml = XmlParser(file)
        names.extend(xml.names)
        boxes.extend(xml.boxes)
        image_id.extend([xml.image_id] * len(xml.names))
        xml_path.extend([xml.xml_file] * len(xml.names))
        img_path.extend([xml.img_path] * len(xml.names))
    a = {"img_id": image_id,
         "names": names,
         "boxes": boxes,
         "xml_path":xml_path,
         "img_path":img_path}
    
    df = pd.DataFrame.from_dict(a, orient='index')
    df = df.transpose()
    
    return df

df = xml_files_to_df(XML_FILES)
df.head()
df.shape[0]

18082

In [7]:
# check values for per class
df['names'].value_counts()

Pedestrian    13445
People         4637
Name: names, dtype: int64

In [8]:
df['boxes']

0          [873, 451, 890, 473]
1          [754, 376, 761, 384]
2          [796, 305, 801, 310]
3          [836, 283, 840, 288]
4        [1291, 160, 1311, 190]
                  ...          
18077     [1499, 94, 1504, 108]
18078     [1489, 95, 1495, 108]
18079      [1688, 62, 1693, 76]
18080      [1678, 59, 1684, 73]
18081      [1655, 31, 1660, 42]
Name: boxes, Length: 18082, dtype: object

In [9]:
# classes need to be in int form so we use LabelEncoder for this task
enc = preprocessing.LabelEncoder()
df['labels'] = enc.fit_transform(df['names'])
df['labels'] = np.stack(df['labels'][i]+1 for i in range(len(df['labels']))) 

  if (await self.run_code(code, result,  async_=asy)):


In [10]:
classes = df[['names','labels']].value_counts()
classes

names       labels
Pedestrian  1         13445
People      2          4637
dtype: int64

In [11]:
df.head()

Unnamed: 0,img_id,names,boxes,xml_path,img_path,labels
0,9999994_00000_d_0000055,Pedestrian,"[873, 451, 890, 473]","[[873, 451, 17, 22, 1, 1, 0, 2], [754, 376, 7,...",train/9999994_00000_d_0000055.jpg,1
1,9999994_00000_d_0000055,Pedestrian,"[754, 376, 761, 384]","[[873, 451, 17, 22, 1, 1, 0, 2], [754, 376, 7,...",train/9999994_00000_d_0000055.jpg,1
2,9999994_00000_d_0000055,Pedestrian,"[796, 305, 801, 310]","[[873, 451, 17, 22, 1, 1, 0, 2], [754, 376, 7,...",train/9999994_00000_d_0000055.jpg,1
3,9999994_00000_d_0000055,People,"[836, 283, 840, 288]","[[873, 451, 17, 22, 1, 1, 0, 2], [754, 376, 7,...",train/9999994_00000_d_0000055.jpg,2
4,9999994_00000_d_0000055,People,"[1291, 160, 1311, 190]","[[873, 451, 17, 22, 1, 1, 0, 2], [754, 376, 7,...",train/9999994_00000_d_0000055.jpg,2


In [12]:
classes = {
                      1 : "Pedestrian",
                      2 : "People",
                    }

In [13]:
#bounding box coordinates point need to be in separate columns

df['xmin'] = -1
df['ymin'] = -1
df['xmax'] = -1
df['ymax'] = -1

df[['xmin','ymin','xmax','ymax']]=np.stack(df['boxes'][i] for i in range(len(df['boxes'])))

df.drop(columns=['boxes'], inplace=True)
df['xmin'] = df['xmin'].astype(float)
df['ymin'] = df['ymin'].astype(float)
df['xmax'] = df['xmax'].astype(float)
df['ymax'] = df['ymax'].astype(float)

  if (await self.run_code(code, result,  async_=asy)):


In [14]:
# drop names column since we dont need it anymore
df.drop(columns=['names'], inplace=True)
df.head()

Unnamed: 0,img_id,xml_path,img_path,labels,xmin,ymin,xmax,ymax
0,9999994_00000_d_0000055,"[[873, 451, 17, 22, 1, 1, 0, 2], [754, 376, 7,...",train/9999994_00000_d_0000055.jpg,1,873.0,451.0,890.0,473.0
1,9999994_00000_d_0000055,"[[873, 451, 17, 22, 1, 1, 0, 2], [754, 376, 7,...",train/9999994_00000_d_0000055.jpg,1,754.0,376.0,761.0,384.0
2,9999994_00000_d_0000055,"[[873, 451, 17, 22, 1, 1, 0, 2], [754, 376, 7,...",train/9999994_00000_d_0000055.jpg,1,796.0,305.0,801.0,310.0
3,9999994_00000_d_0000055,"[[873, 451, 17, 22, 1, 1, 0, 2], [754, 376, 7,...",train/9999994_00000_d_0000055.jpg,2,836.0,283.0,840.0,288.0
4,9999994_00000_d_0000055,"[[873, 451, 17, 22, 1, 1, 0, 2], [754, 376, 7,...",train/9999994_00000_d_0000055.jpg,2,1291.0,160.0,1311.0,190.0


In [15]:
len(df['img_id'].unique())

940

In [16]:
image_ids = df['img_id'].unique()
valid_ids = image_ids[-4:]
train_ids = image_ids[:-4]
len(train_ids)

936

In [17]:
valid_df = df[df['img_id'].isin(valid_ids)]
train_df = df[df['img_id'].isin(train_ids)]
valid_df.shape, train_df.shape

((121, 8), (17961, 8))

In [18]:
!pip install -q albumentations
import tensorflow as tf
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import os
from albumentations import RandomRotate90
from tensorflow.keras import mixed_precision
import gc

In [19]:
def func(image):
    Trgb2lms =np.array( [
          np.array([17.8824, 43.5161, 4.1194]),
          np.array([3.4557,27.1154, 3.8671]),
          np.array([0.0300, 0.1843, 1.4671]) 
      ])
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    x,y,z = image.shape
#     print(image.shape)
    cvd_due = np.array([
                     np.array([1 ,0, 0]),   
                     np.array([0.494207, 0, 1.24827]),   
                     np.array([0, 0, 1]),   
    ])
    INV_Trgb2lms = np.linalg.inv(Trgb2lms) 

#     print(image.transpose(2, 0, 1).shape)
    out = np.dot(INV_Trgb2lms, cvd_due)
    out = np.dot(out, Trgb2lms)
    out = np.dot(out, image.transpose(2, 0, 1).reshape(3,-1)) 
    out = out.reshape(3,x,y).transpose(1, 2, 0)
    out = cv2.cvtColor(np.float32(out), cv2.COLOR_RGB2BGR)

    return out
  

In [20]:
class VOCDataset(Dataset):
    
    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()
        
        self.image_ids = dataframe['img_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms
    
    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        records = self.df[self.df['img_id'] == image_id]
        
        image = cv2.imread(f'{self.image_dir}/{image_id}.jpg', cv2.IMREAD_COLOR)
        image = func(image)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        rows, cols = image.shape[:2]
        
        boxes = records[['xmin', 'ymin', 'xmax', 'ymax']].values
        
       
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        area = torch.as_tensor(area, dtype=torch.float32)
        
        label = records['labels'].values
        labels = torch.as_tensor(label, dtype=torch.int64)
        
        # suppose all instances are not crowd
        iscrowd = torch.zeros((records.shape[0],), dtype=torch.int64)
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        # target['masks'] = None
        target['image_id'] = torch.tensor([index])
        target['area'] = area
        target['iscrowd'] = iscrowd
        
        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)
            image = sample['image']
            
            target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1,0)
            
            return image, target
        
    def __len__(self) -> int:
        return self.image_ids.shape[0]

In [21]:
def get_transform_train():
    return A.Compose([
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.2),
        ToTensorV2(p=1.0)
    ], bbox_params={'format':'pascal_voc', 'label_fields': ['labels']})

def get_transform_valid():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields':['labels']})

In [22]:
def collate_fn(batch):
    return tuple(zip(*batch))

train_dataset = VOCDataset(train_df, IMG_PATH , get_transform_train())
valid_dataset = VOCDataset(valid_df, IMG_PATH, get_transform_valid())


# split the dataset in train and test set
indices = torch.randperm(len(train_dataset)).tolist()


train_data_loader = DataLoader(
    train_dataset,
    batch_size=4,
    shuffle=True,
    num_workers=4,
    collate_fn=collate_fn
)

valid_data_loader = DataLoader(
    valid_dataset,
    batch_size=4,
    shuffle=False,
    num_workers=4,
    collate_fn=collate_fn
)



In [23]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [24]:
images, targets= next(iter(train_data_loader))
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

plt.figure(figsize=(20,20))
for i, (image, target) in enumerate(zip(images, targets)):
    plt.subplot(2,2, i+1)
    boxes = targets[i]['boxes'].cpu().numpy().astype(np.int32)
    sample = images[i].permute(1,2,0).cpu().numpy()
    names = targets[i]['labels'].cpu().numpy().astype(np.int64)
    for i,box in enumerate(boxes):
        cv2.rectangle(sample,
                      (box[0], box[1]),
                      (box[2], box[3]),
                      (0, 0, 220), 2)
        cv2.putText(sample, classes[names[i]], (box[0],box[1]+15),cv2.FONT_HERSHEY_COMPLEX ,0.5,(0,220,0),1,cv2.LINE_AA)  

    plt.axis('off')
    plt.imshow(sample)
    

Output hidden; open in https://colab.research.google.com to view.

In [25]:
# load a model; pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)



In [26]:
num_classes = 12

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [27]:
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

In [28]:
!pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI
  Cloning https://github.com/cocodataset/cocoapi.git to /tmp/pip-req-build-31s6g0lp
  Running command git clone --filter=blob:none --quiet https://github.com/cocodataset/cocoapi.git /tmp/pip-req-build-31s6g0lp
  Resolved https://github.com/cocodataset/cocoapi.git to commit 8c9bcc3cf640524c4c20a9c40e89cb6a2f2fa0e9
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [29]:
!git clone https://github.com/pytorch/vision.git
!cd vision;cp references/detection/utils.py ../;cp references/detection/transforms.py ../;cp references/detection/coco_eval.py ../;cp references/detection/engine.py ../;cp references/detection/coco_utils.py ../

fatal: destination path 'vision' already exists and is not an empty directory.


In [30]:
from engine import train_one_epoch, evaluate
import utils

In [31]:
%%time
# let's train it for 1 epoch
num_epochs = 1

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, valid_data_loader, device=device)

Epoch: [0]  [  0/234]  eta: 0:50:19  lr: 0.000026  loss: 5.1418 (5.1418)  loss_classifier: 2.7267 (2.7267)  loss_box_reg: 0.2778 (0.2778)  loss_objectness: 1.5747 (1.5747)  loss_rpn_box_reg: 0.5627 (0.5627)  time: 12.9032  data: 3.4647  max mem: 4239
Epoch: [0]  [ 10/234]  eta: 0:09:01  lr: 0.000241  loss: 3.7628 (4.0254)  loss_classifier: 2.6887 (2.6490)  loss_box_reg: 0.2690 (0.2704)  loss_objectness: 0.4815 (0.6354)  loss_rpn_box_reg: 0.5627 (0.4707)  time: 2.4179  data: 0.4363  max mem: 4599
Epoch: [0]  [ 20/234]  eta: 0:07:00  lr: 0.000455  loss: 3.6159 (3.6848)  loss_classifier: 2.4364 (2.4020)  loss_box_reg: 0.2643 (0.2762)  loss_objectness: 0.4049 (0.5558)  loss_rpn_box_reg: 0.3991 (0.4507)  time: 1.4196  data: 0.1384  max mem: 4743
Epoch: [0]  [ 30/234]  eta: 0:05:58  lr: 0.000670  loss: 2.6337 (3.2240)  loss_classifier: 1.7599 (2.0651)  loss_box_reg: 0.2719 (0.2891)  loss_objectness: 0.2928 (0.4570)  loss_rpn_box_reg: 0.3981 (0.4128)  time: 1.3923  data: 0.1275  max mem: 4743



Test:  [0/1]  eta: 0:00:02  model_time: 0.4669 (0.4669)  evaluator_time: 0.1738 (0.1738)  time: 2.1662  data: 1.4865  max mem: 4945
Test: Total time: 0:00:02 (2.3015 s / it)
Averaged stats: model_time: 0.4669 (0.4669)  evaluator_time: 0.1738 (0.1738)
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.042
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.116
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.023
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.043
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.066
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.006
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.063
 Average Recall     (AR) @[ IoU=0.50:0.9

In [32]:
torch.save(model.state_dict(), 'faster_rcnn_state2.pth')

In [33]:
# load  a model; pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False)

WEIGHTS_FILE = "./faster_rcnn_state2.pth"

num_classes = 12

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Load the traines weights
model.load_state_dict(torch.load(WEIGHTS_FILE))

model = model.to(device)




In [34]:
def obj_detector(img):
    img = cv2.imread(img, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)


    img /= 255.0
    img = torch.from_numpy(img)
    img = img.unsqueeze(0)
    img = img.permute(0,3,1,2)
    
    model.eval()

    detection_threshold = 0.70
    
    img = list(im.to(device) for im in img)
    output = model(img)

    for i , im in enumerate(img):
        boxes = output[i]['boxes'].data.cpu().numpy()
        scores = output[i]['scores'].data.cpu().numpy()
        labels = output[i]['labels'].data.cpu().numpy()

        labels = labels[scores >= detection_threshold]
        boxes = boxes[scores >= detection_threshold].astype(np.int32)
        scores = scores[scores >= detection_threshold]

        boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
        boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
    
    sample = img[0].permute(1,2,0).cpu().numpy()
    sample = np.array(sample)
    boxes = output[0]['boxes'].data.cpu().numpy()
    name = output[0]['labels'].data.cpu().numpy()
    scores = output[0]['scores'].data.cpu().numpy()
    boxes = boxes[scores >= detection_threshold].astype(np.int32)
    names = name.tolist()
    
    return names, boxes, sample

In [35]:
pred_path = "./test1"
pred_files = [os.path.join(pred_path,f) for f in os.listdir(pred_path)]

for i, images in enumerate(pred_files):
    print(i,images)
    names,boxes,sample = obj_detector(images)

    img = cv2.imread(images)
    
    for i,box in enumerate(boxes):
        cv2.rectangle(img,
                      (box[0], box[1]),
                      (box[2], box[3]),
                      (0, 220, 0), 2)  

    cv2.imwrite(f'./test2/{images[7:]}',img)

0 ./test1/0000007_05999_d_0000038.jpg
1 ./test1/0000002_00005_d_0000014.jpg
2 ./test1/0000072_00000_d_0000001.jpg
3 ./test1/0000107_02196_d_0000055.jpg
4 ./test1/0000072_07660_d_0000012.jpg
5 ./test1/0000008_00889_d_0000039.jpg
6 ./test1/0000008_03499_d_0000043.jpg
7 ./test1/0000008_03999_d_0000044.jpg
8 ./test1/0000008_04499_d_0000045.jpg
9 ./test1/0000008_02999_d_0000042.jpg
10 ./test1/0000008_02499_d_0000041.jpg
11 ./test1/0000008_01999_d_0000040.jpg
12 ./test1/0000036_00500_d_0000046.jpg
13 ./test1/0000031_02000_d_0000041.jpg
14 ./test1/0000031_03527_d_0000043.jpg
15 ./test1/0000031_00000_d_0000037.jpg
16 ./test1/9999999_00301_d_0000133.jpg
17 ./test1/9999999_00299_d_0000132.jpg
18 ./test1/0000040_04284_d_0000071.jpg
19 ./test1/0000040_02454_d_0000068.jpg
20 ./test1/0000040_03288_d_0000069.jpg
21 ./test1/0000040_03752_d_0000070.jpg
22 ./test1/0000040_01500_d_0000067.jpg
23 ./test1/0000040_01000_d_0000066.jpg
24 ./test1/0000039_05300_d_0000061.jpg
25 ./test1/0000039_00000_d_0000055.