In [1]:
import pandas as pd
import numpy as np
from PIL import Image
import cv2 as cv
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader

In [2]:
base_path = 'D:/AI Courses/Semester 3/Hardware Technologies for AI/Project/Operational Data/'
data_path = '/Small Data Set/Small set data/'
imgs_path = '/Small Data Set/Small set images/'
headers_path = './Data Column Headings.csv'

In [3]:
headers = pd.read_csv(headers_path)
imgs = os.listdir(base_path + imgs_path)
csv_dirs = [base_path + imgs_path + each[:-4] + '.csv' for each in imgs]
imgs = [base_path + imgs_path + each for each in imgs]

df_dict = {'Images': imgs, 'Data': csv_dirs}
df = pd.DataFrame(df_dict)
# df.columns = headers.columns

In [4]:
df

Unnamed: 0,Images,Data
0,D:/AI Courses/Semester 3/Hardware Technologies...,D:/AI Courses/Semester 3/Hardware Technologies...
1,D:/AI Courses/Semester 3/Hardware Technologies...,D:/AI Courses/Semester 3/Hardware Technologies...
2,D:/AI Courses/Semester 3/Hardware Technologies...,D:/AI Courses/Semester 3/Hardware Technologies...
3,D:/AI Courses/Semester 3/Hardware Technologies...,D:/AI Courses/Semester 3/Hardware Technologies...
4,D:/AI Courses/Semester 3/Hardware Technologies...,D:/AI Courses/Semester 3/Hardware Technologies...
...,...,...
95,D:/AI Courses/Semester 3/Hardware Technologies...,D:/AI Courses/Semester 3/Hardware Technologies...
96,D:/AI Courses/Semester 3/Hardware Technologies...,D:/AI Courses/Semester 3/Hardware Technologies...
97,D:/AI Courses/Semester 3/Hardware Technologies...,D:/AI Courses/Semester 3/Hardware Technologies...
98,D:/AI Courses/Semester 3/Hardware Technologies...,D:/AI Courses/Semester 3/Hardware Technologies...


In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

RESIZE = 448
BATCH_SIZE = 16

In [37]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

data_transforms = {
    'train':
    transforms.Compose([
        transforms.Resize((RESIZE, RESIZE)),
        transforms.GaussianBlur((19, 19)),
        # transforms.RandomAffine(0, shear=10, scale=(0.9,1.1)),
        transforms.RandomHorizontalFlip(),
        # transforms.RandomRotation(10),
        # transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
        transforms.ToTensor(),
        normalize
    ]),
    'other':
    transforms.Compose([
        transforms.Resize((RESIZE, RESIZE)),
        transforms.GaussianBlur((19, 19)),
        transforms.ToTensor(),
        normalize
    ]),
    'rcnn':
    transforms.Compose([
        transforms.Resize((RESIZE, RESIZE)),
        # transforms.GaussianBlur((19, 19)),
        transforms.ToTensor(),
        # normalize
    ]),
    'rcnn_v':
    transforms.Compose([
        transforms.Resize((RESIZE, RESIZE)),
        # transforms.ToTensor(),
        # normalize
    ])
}

In [7]:
class TruckDataset(Dataset):
    def __init__(self,
                df,
                transform=None,
                shuffle_data=True):

        self.df = df
        self.transform = transform

        if shuffle_data:
            self.df = self.df.sample(frac=1).reset_index(drop=True)



    def __len__(self) -> int:
        return len(self.df)

    def __getitem__(self, idx) -> tuple:
        image = Image.open(self.df.iloc[idx, 0] + '.jpg').convert("RGB")
        # question = self.df.iloc[idx, 2]
        label = self.df.iloc[idx, 1]

        if self.img_transform:
            image = self.img_transform(image)


        return image, label

In [8]:
COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

In [66]:
def extract_trucks(model, img):

    model.eval()
    img_rcnn = data_transforms['rcnn'](img)
    img_v = np.array(data_transforms['rcnn_v'](img))
    c, w, h = img_rcnn.shape

    res = model(img_rcnn.reshape((1, c, w, h)))
    bboxes, lbls = res[0]['boxes'].detach(), res[0]['labels']
    bboxes = [np.array(each, dtype=int) for each in bboxes]
    bboxes = np.array(bboxes)

    trucks_bboxs = []

    for i, each_lbl in enumerate(lbls):
        if each_lbl == 8:
            trucks_bboxs.append(bboxes[i])

    max_wh = 0
    max_wh_id = 0
    
    for i, (x, y, a, b) in enumerate(trucks_bboxs):
        w = a - x
        h = b - y
        
        if w + h > max_wh:
            max_wh = w + h
            max_wh_id = i
    
    truck = trucks_bboxs[max_wh_id]

    img_crop = img_v[truck[1]: truck[3], truck[0]: truck[2]]
    img_crop = data_transforms['rcnn'](Image.fromarray(img_crop))

    return img_crop

In [50]:
exmple_img_path = df.iloc[74][0]

In [51]:
exmple_img_path

'D:/AI Courses/Semester 3/Hardware Technologies for AI/Project/Operational Data//Small Data Set/Small set images/00076.jpg'

In [52]:
rcnn = models.detection.fasterrcnn_resnet50_fpn_v2(weights='DEFAULT')
rcnn.eval()

image = Image.open(exmple_img_path).convert('RGB')

In [53]:
image.size

(1280, 720)

In [68]:

extract_trucks(rcnn, image).shape
# cv.imshow('image', extract_trucks(rcnn, image))
# cv.waitKey(0)
# cv.destroyAllWindows()

torch.Size([3, 448, 448])

In [55]:
img_transformed = data_transforms['rcnn'](image)

In [56]:
img_transformed.shape

torch.Size([3, 448, 448])

In [57]:
c, w, h = img_transformed.shape

res = rcnn(img_transformed.reshape((1, c, w, h)))

In [58]:
res[0]

{'boxes': tensor([[ 68.3726, 109.4790, 206.3091, 341.5249],
         [ 67.9908, 118.7250, 205.1931, 339.4185],
         [ 20.9869,  87.1199,  25.3355,  92.7446],
         [ 70.5592, 119.6393, 207.2134, 336.5806],
         [ 97.7444, 227.7149, 106.9533, 251.3170]], grad_fn=<StackBackward0>),
 'labels': tensor([8, 6, 3, 7, 1]),
 'scores': tensor([0.9021, 0.6898, 0.5592, 0.2229, 0.1705], grad_fn=<IndexBackward0>)}

In [59]:
bboxes = res[0]['boxes'].detach()

In [60]:
# bboxes

In [61]:
# exmpl_bbox = np.array(bboxes[0], dtype=int)

In [62]:
bboxes = [np.array(each, dtype=int) for each in bboxes]
bboxes = np.array(bboxes)

In [63]:
bboxes

array([[ 68, 109, 206, 341],
       [ 67, 118, 205, 339],
       [ 20,  87,  25,  92],
       [ 70, 119, 207, 336],
       [ 97, 227, 106, 251]])

In [64]:
res[0]['labels']

tensor([8, 6, 3, 7, 1])

In [65]:
img_raw = np.array(data_transforms['rcnn_v'](image))

# for each_bbox in bboxes:
#     cv.rectangle(img_raw, (each_bbox[0], each_bbox[1]), (each_bbox[2], each_bbox[3]), (0, 255, 0))
num = [0, 3]

for id in num:
    cv.rectangle(img_raw, (bboxes[id][0], bboxes[id][1]), (bboxes[id][2], bboxes[id][3]), (0, 255, 0), 2)
    # img_raw_crop = img_raw[bboxes[id][1]: bboxes[id][3], bboxes[id][0]: bboxes[id][2]]
    print(bboxes[id][2] - bboxes[id][0], bboxes[id][3] - bboxes[id][1])
    cv.imshow('image', img_raw)
    cv.waitKey(0)
    cv.destroyAllWindows()

138 232
137 217
