In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
cd /content/gdrive/MyDrive/data_visualization/datavis_data/

/content/gdrive/MyDrive/data_visualization/datavis_data


In [3]:
import os
import collections
import pandas as pd
import numpy as np
import functools
import matplotlib.pyplot as plt
import cv2

from sklearn import preprocessing 


import xml.etree.ElementTree as ET

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torchvision

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

from torch.utils.data import DataLoader, Dataset
from torch.utils.data import SequentialSampler

In [4]:
XML_PATH = "annotation"
IMG_PATH = "train"
XML_FILES = [os.path.join(XML_PATH, f) for f in os.listdir(XML_PATH)]
XML_FILES = XML_FILES[:1000] #first 1000

len(XML_FILES)

1000

In [5]:
class XmlParser(object):

    def __init__(self,xml_file):

        self.xml_file = self.filter(xml_file)
        
        # path to the image file = name of annotation file
        self.img_name = xml_file.split('/')[1].split('.')[0]+".jpg";
        self.img_path = os.path.join(IMG_PATH, self.img_name)

        # image id 
        self.image_id = self.img_name.split('.')[0];

        # names of the classes contained in the xml file
        self.names = self._get_names()
        # coordinates of the bounding boxes
        self.boxes = self._get_bndbox()

    def filter(self,xml_file):

        filtered_data = []

        f = open(xml_file, 'r')

        for line in f:
            data = line.split(',')
            if data[5]!='0' and (data[5]=='9' or data[5]=='10'):
                data = [int(x.strip()) for x in data]
                filtered_data.append(data)

                #augmentation for people
                if data[5]=='1' or data[5]=='2':
                  filtered_data.append(data)

        return filtered_data

    def _get_names(self):

        label_dict = {
                      0 : "Ignore",
                      1 : "Pedestrian",
                      2 : "People",
                      3 : "Bicycle",
                      4 : "Car",
                      5 : "Van",
                      6 : "Truck",
                      7 : "Tricycle",
                      8 : "Awning-tricycle",
                      9 : "Bus",
                      10 : "Motor",
                      11 : "Others"
                    }

        names = []

        for data in self.xml_file:
              class_id = data[5]
              names.append(label_dict[class_id])

        return np.array(names)

    def _get_bndbox(self):

        boxes = []

        for data in self.xml_file:
            
            coordinates = []
            coordinates.append(np.int32(data[0])) #xmin
            coordinates.append(np.int32(np.float32(data[1]))) #ymin
            coordinates.append(np.int32(data[2]+data[0])) #xmax
            coordinates.append(np.int32(data[3]+data[1])) #ymax
            boxes.append(coordinates)

        return np.array(boxes)

# xml = XmlParser('Annotations/0000007_05999_d_0000038.txt')

In [6]:
def xml_files_to_df(xml_files):
    
    """"Return pandas dataframe from list of XML files."""
    
    names = []
    boxes = []
    image_id = []
    xml_path = []
    img_path = []
    for file in xml_files:
        xml = XmlParser(file)
        names.extend(xml.names)
        boxes.extend(xml.boxes)
        image_id.extend([xml.image_id] * len(xml.names))
        xml_path.extend([xml.xml_file] * len(xml.names))
        img_path.extend([xml.img_path] * len(xml.names))
    a = {"img_id": image_id,
         "names": names,
         "boxes": boxes,
         "xml_path":xml_path,
         "img_path":img_path}
    
    df = pd.DataFrame.from_dict(a, orient='index')
    df = df.transpose()
    
    return df

df = xml_files_to_df(XML_FILES)
df.head()
df.shape[0]

5511

In [7]:
# check values for per class
df['names'].value_counts()

Motor    5097
Bus       414
Name: names, dtype: int64

In [8]:
df['boxes']

0         [439, 977, 483, 994]
1         [449, 949, 493, 969]
2         [481, 771, 521, 801]
3         [504, 687, 547, 709]
4         [512, 648, 546, 677]
                 ...          
5506     [990, 540, 1020, 576]
5507    [526, 1212, 578, 1238]
5508      [401, 196, 414, 207]
5509    [1956, 346, 1982, 361]
5510      [915, 377, 935, 391]
Name: boxes, Length: 5511, dtype: object

In [9]:
# classes need to be in int form so we use LabelEncoder for this task
enc = preprocessing.LabelEncoder()
df['labels'] = enc.fit_transform(df['names'])
df['labels'] = np.stack(df['labels'][i]+1 for i in range(len(df['labels']))) 

  if (await self.run_code(code, result,  async_=asy)):


In [10]:
classes = df[['names','labels']].value_counts()
classes

names  labels
Motor  2         5097
Bus    1          414
dtype: int64

In [11]:
df.head()

Unnamed: 0,img_id,names,boxes,xml_path,img_path,labels
0,9999990_00000_d_0000013,Motor,"[439, 977, 483, 994]","[[439, 977, 44, 17, 1, 10, 0, 1], [449, 949, 4...",train/9999990_00000_d_0000013.jpg,2
1,9999990_00000_d_0000013,Motor,"[449, 949, 493, 969]","[[439, 977, 44, 17, 1, 10, 0, 1], [449, 949, 4...",train/9999990_00000_d_0000013.jpg,2
2,9999990_00000_d_0000013,Motor,"[481, 771, 521, 801]","[[439, 977, 44, 17, 1, 10, 0, 1], [449, 949, 4...",train/9999990_00000_d_0000013.jpg,2
3,9999990_00000_d_0000013,Motor,"[504, 687, 547, 709]","[[439, 977, 44, 17, 1, 10, 0, 1], [449, 949, 4...",train/9999990_00000_d_0000013.jpg,2
4,9999990_00000_d_0000013,Motor,"[512, 648, 546, 677]","[[439, 977, 44, 17, 1, 10, 0, 1], [449, 949, 4...",train/9999990_00000_d_0000013.jpg,2


In [12]:
classes = {
                      1 : "bus",
                      2 : "motor",
                    }

In [13]:
#bounding box coordinates point need to be in separate columns

df['xmin'] = -1
df['ymin'] = -1
df['xmax'] = -1
df['ymax'] = -1

df[['xmin','ymin','xmax','ymax']]=np.stack(df['boxes'][i] for i in range(len(df['boxes'])))

df.drop(columns=['boxes'], inplace=True)
df['xmin'] = df['xmin'].astype(float)
df['ymin'] = df['ymin'].astype(float)
df['xmax'] = df['xmax'].astype(float)
df['ymax'] = df['ymax'].astype(float)

  if (await self.run_code(code, result,  async_=asy)):


In [14]:
# drop names column since we dont need it anymore
df.drop(columns=['names'], inplace=True)
df.head()

Unnamed: 0,img_id,xml_path,img_path,labels,xmin,ymin,xmax,ymax
0,9999990_00000_d_0000013,"[[439, 977, 44, 17, 1, 10, 0, 1], [449, 949, 4...",train/9999990_00000_d_0000013.jpg,2,439.0,977.0,483.0,994.0
1,9999990_00000_d_0000013,"[[439, 977, 44, 17, 1, 10, 0, 1], [449, 949, 4...",train/9999990_00000_d_0000013.jpg,2,449.0,949.0,493.0,969.0
2,9999990_00000_d_0000013,"[[439, 977, 44, 17, 1, 10, 0, 1], [449, 949, 4...",train/9999990_00000_d_0000013.jpg,2,481.0,771.0,521.0,801.0
3,9999990_00000_d_0000013,"[[439, 977, 44, 17, 1, 10, 0, 1], [449, 949, 4...",train/9999990_00000_d_0000013.jpg,2,504.0,687.0,547.0,709.0
4,9999990_00000_d_0000013,"[[439, 977, 44, 17, 1, 10, 0, 1], [449, 949, 4...",train/9999990_00000_d_0000013.jpg,2,512.0,648.0,546.0,677.0


In [15]:
len(df['img_id'].unique())

778

In [16]:
image_ids = df['img_id'].unique()
valid_ids = image_ids[-4:]
train_ids = image_ids[:-4]
len(train_ids)

774

In [17]:
valid_df = df[df['img_id'].isin(valid_ids)]
train_df = df[df['img_id'].isin(train_ids)]
valid_df.shape, train_df.shape

((30, 8), (5481, 8))

In [18]:
!pip install -q albumentations
import tensorflow as tf
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import os
from albumentations import RandomRotate90
from tensorflow.keras import mixed_precision
import gc

In [19]:
def func(image):
    Trgb2lms =np.array( [
          np.array([17.8824, 43.5161, 4.1194]),
          np.array([3.4557,27.1154, 3.8671]),
          np.array([0.0300, 0.1843, 1.4671]) 
      ])
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    x,y,z = image.shape
#     print(image.shape)
    cvd_due = np.array([
                     np.array([1 ,0, 0]),   
                     np.array([0.494207, 0, 1.24827]),   
                     np.array([0, 0, 1]),   
    ])
    INV_Trgb2lms = np.linalg.inv(Trgb2lms) 

#     print(image.transpose(2, 0, 1).shape)
    out = np.dot(INV_Trgb2lms, cvd_due)
    out = np.dot(out, Trgb2lms)
    out = np.dot(out, image.transpose(2, 0, 1).reshape(3,-1)) 
    out = out.reshape(3,x,y).transpose(1, 2, 0)
    out = cv2.cvtColor(np.float32(out), cv2.COLOR_RGB2BGR)

    return out
  

In [20]:
class VOCDataset(Dataset):
    
    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()
        
        self.image_ids = dataframe['img_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms
    
    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        records = self.df[self.df['img_id'] == image_id]
        
        image = cv2.imread(f'{self.image_dir}/{image_id}.jpg', cv2.IMREAD_COLOR)
        image = func(image)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        rows, cols = image.shape[:2]
        
        boxes = records[['xmin', 'ymin', 'xmax', 'ymax']].values
        
       
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        area = torch.as_tensor(area, dtype=torch.float32)
        
        label = records['labels'].values
        labels = torch.as_tensor(label, dtype=torch.int64)
        
        # suppose all instances are not crowd
        iscrowd = torch.zeros((records.shape[0],), dtype=torch.int64)
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        # target['masks'] = None
        target['image_id'] = torch.tensor([index])
        target['area'] = area
        target['iscrowd'] = iscrowd
        
        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)
            image = sample['image']
            
            target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1,0)
            
            return image, target
        
    def __len__(self) -> int:
        return self.image_ids.shape[0]

In [21]:
def get_transform_train():
    return A.Compose([
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.2),
        ToTensorV2(p=1.0)
    ], bbox_params={'format':'pascal_voc', 'label_fields': ['labels']})

def get_transform_valid():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields':['labels']})

In [22]:
def collate_fn(batch):
    return tuple(zip(*batch))

train_dataset = VOCDataset(train_df, IMG_PATH , get_transform_train())
valid_dataset = VOCDataset(valid_df, IMG_PATH, get_transform_valid())


# split the dataset in train and test set
indices = torch.randperm(len(train_dataset)).tolist()


train_data_loader = DataLoader(
    train_dataset,
    batch_size=4,
    shuffle=True,
    num_workers=4,
    collate_fn=collate_fn
)

valid_data_loader = DataLoader(
    valid_dataset,
    batch_size=4,
    shuffle=False,
    num_workers=4,
    collate_fn=collate_fn
)



In [23]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
images, targets= next(iter(train_data_loader))
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

plt.figure(figsize=(20,20))
for i, (image, target) in enumerate(zip(images, targets)):
    plt.subplot(2,2, i+1)
    boxes = targets[i]['boxes'].cpu().numpy().astype(np.int32)
    sample = images[i].permute(1,2,0).cpu().numpy()
    names = targets[i]['labels'].cpu().numpy().astype(np.int64)
    for i,box in enumerate(boxes):
        cv2.rectangle(sample,
                      (box[0], box[1]),
                      (box[2], box[3]),
                      (0, 0, 220), 2)
        cv2.putText(sample, classes[names[i]], (box[0],box[1]+15),cv2.FONT_HERSHEY_COMPLEX ,0.5,(0,220,0),1,cv2.LINE_AA)  

    plt.axis('off')
    plt.imshow(sample)
    

In [None]:
# load a model; pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:02<00:00, 79.6MB/s]


In [None]:
num_classes = 12

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [None]:
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

In [None]:
!pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'

In [None]:
!git clone https://github.com/pytorch/vision.git
!cd vision;cp references/detection/utils.py ../;cp references/detection/transforms.py ../;cp references/detection/coco_eval.py ../;cp references/detection/engine.py ../;cp references/detection/coco_utils.py ../

In [None]:
from engine import train_one_epoch, evaluate
import utils

In [None]:
%%time
# let's train it for 1 epoch
num_epochs = 5

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, valid_data_loader, device=device)

In [None]:
torch.save(model.state_dict(), 'faster_rcnn_state5.pth')

In [None]:
# load  a model; pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False)

WEIGHTS_FILE = "./faster_rcnn_state5.pth"

num_classes = 12

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Load the traines weights
model.load_state_dict(torch.load(WEIGHTS_FILE))

model = model.to(device)


In [None]:
def obj_detector(img):
    img = cv2.imread(img, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)


    img /= 255.0
    img = torch.from_numpy(img)
    img = img.unsqueeze(0)
    img = img.permute(0,3,1,2)
    
    model.eval()

    detection_threshold = 0.70
    
    img = list(im.to(device) for im in img)
    output = model(img)

    for i , im in enumerate(img):
        boxes = output[i]['boxes'].data.cpu().numpy()
        scores = output[i]['scores'].data.cpu().numpy()
        labels = output[i]['labels'].data.cpu().numpy()

        labels = labels[scores >= detection_threshold]
        boxes = boxes[scores >= detection_threshold].astype(np.int32)
        scores = scores[scores >= detection_threshold]

        boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
        boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
    
    sample = img[0].permute(1,2,0).cpu().numpy()
    sample = np.array(sample)
    boxes = output[0]['boxes'].data.cpu().numpy()
    name = output[0]['labels'].data.cpu().numpy()
    scores = output[0]['scores'].data.cpu().numpy()
    boxes = boxes[scores >= detection_threshold].astype(np.int32)
    names = name.tolist()
    
    return names, boxes, sample

In [None]:
pred_path = "./test4"
pred_files = [os.path.join(pred_path,f) for f in os.listdir(pred_path)]

for i, images in enumerate(pred_files):
    print(i,images)
    names,boxes,sample = obj_detector(images)

    img = cv2.imread(images)
    
    for i,box in enumerate(boxes):
        cv2.rectangle(img,
                      (box[0], box[1]),
                      (box[2], box[3]),
                      (0, 220, 0), 2)  

    cv2.imwrite(f'./test5/{images[7:]}',img)

0 ./test3/0000007_05999_d_0000038.jpg
1 ./test3/0000002_00005_d_0000014.jpg
2 ./test3/0000072_00000_d_0000001.jpg
3 ./test3/0000107_02196_d_0000055.jpg
4 ./test3/0000072_07660_d_0000012.jpg
5 ./test3/0000008_00889_d_0000039.jpg
6 ./test3/0000008_03499_d_0000043.jpg
7 ./test3/0000008_03999_d_0000044.jpg
8 ./test3/0000008_04499_d_0000045.jpg
9 ./test3/0000008_02999_d_0000042.jpg
10 ./test3/0000008_02499_d_0000041.jpg
11 ./test3/0000008_01999_d_0000040.jpg
12 ./test3/0000036_00500_d_0000046.jpg
13 ./test3/0000031_02000_d_0000041.jpg
14 ./test3/0000031_03527_d_0000043.jpg
15 ./test3/0000031_00000_d_0000037.jpg
16 ./test3/9999999_00301_d_0000133.jpg
17 ./test3/9999999_00299_d_0000132.jpg
18 ./test3/0000040_04284_d_0000071.jpg
19 ./test3/0000040_02454_d_0000068.jpg
20 ./test3/0000040_03288_d_0000069.jpg
21 ./test3/0000040_03752_d_0000070.jpg
22 ./test3/0000040_01500_d_0000067.jpg
23 ./test3/0000040_01000_d_0000066.jpg
24 ./test3/0000039_05300_d_0000061.jpg
25 ./test3/0000039_00000_d_0000055.