In [1]:
import numpy as np 
import pandas as pd
import os
import copy
import math
from PIL import Image
import cv2

from collections import defaultdict, deque
import datetime
import time
from tqdm import tqdm

In [2]:
import torch
import torchvision
from torchvision import datasets, models
from torchvision.transforms import functional as FT
from torchvision import transforms as T
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, sampler, random_split, Dataset

import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import torchvision.datasets as dset
from torchvision.utils import draw_bounding_boxes

In [4]:
from pycocotools.coco import COCO
import albumentations as A  
from albumentations.pytorch import ToTensorV2

In [5]:
def get_transforms(train=False):
    if train:
        transform = A.Compose([
            A.Resize(100, 100), # our input size can be 600px
            A.HorizontalFlip(p=0.3),
            A.VerticalFlip(p=0.3),
            A.RandomBrightnessContrast(p=0.1),
            A.ColorJitter(p=0.1),
            ToTensorV2()
        ], bbox_params=A.BboxParams(format='coco'))
    else:
        transform = A.Compose([
            A.Resize(100, 100), # our input size can be 600px
            ToTensorV2()
        ], bbox_params=A.BboxParams(format='coco'))
    return transform

In [6]:
class Antsandbees(datasets.VisionDataset):
    def __init__(self, root, json_path, transforms=None):
        super().__init__(Antsandbees)
        
        self.root = root
        self.transforms = transforms
        self.coco = COCO(json_path) # annotatiosn stored here
        self.ids = list(sorted(self.coco.imgs.keys()))
        self.ids = [id for id in self.ids if (len(self._load_target(id)) > 0)]
        #print(self.ids,"iddddddddddddddd")
         
    def _load_image(self, id: int):
        path = self.coco.loadImgs(id)[0]['file_name']
        image = cv2.imread(os.path.join(self.root, "Sample_Data/images", path))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        return image
    
    def _load_target(self, id):
        return self.coco.loadAnns(self.coco.getAnnIds(id))
    
    def __getitem__(self, index):
        id = self.ids[index]
        image = self._load_image(id)
        target = self._load_target(id)
        target = copy.deepcopy(self._load_target(id))
        
        boxes = [t['bbox'] + [t['category_id']] for t in target]
#         print(boxes)
        if self.transforms is not None:
            transformed = self.transforms(image=image, bboxes=boxes)
#             print(transformed)
        image = transformed['image']
        boxes = transformed['bboxes']
#         print(boxes)
        
        new_boxes = []
        for box in boxes:
            xmin = box[0]
            xmax = xmin + box[2]
            ymin = box[1]
            ymax = ymin + box[3]
            new_boxes.append([xmin, ymin, xmax, ymax])
        
#         print("New boxes :", new_boxes)
        boxes = torch.tensor(new_boxes, dtype=torch.float32)
        targ = {} 
        targ['boxes'] = boxes
        targ['labels'] = torch.tensor([t['category_id'] for t in target], dtype=torch.int64)
        targ['image_id'] = torch.tensor([t['image_id'] for t in target])
        targ['area'] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        targ['iscrowd'] = torch.tensor([t['iscrowd'] for t in target], dtype=torch.int64)
        return image.div(255), targ
    
    def __len__(self):
        return len(self.ids)    

In [7]:
dataset_path = '/home/roufa/Desktop/interview/pytorch/Image_detection'
Json_file = "Sample_Data/2.coco_annotations/output.json"
transforms = get_transforms()

Json_path = os.path.join(dataset_path, Json_file)

In [8]:
Json_path

'/home/roufa/Desktop/interview/pytorch/Image_detection/Sample_Data/2.coco_annotations/output.json'

In [9]:
train_dataset = Antsandbees(dataset_path, Json_path, transforms= transforms)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [10]:
train_dataset

Dataset Antsandbees
    Number of datapoints: 10
    Root location: /home/roufa/Desktop/interview/pytorch/Image_detection
    Compose([
  Resize(always_apply=False, p=1, height=100, width=100, interpolation=1),
  ToTensorV2(always_apply=True, p=1.0, transpose_mask=False),
], p=1.0, bbox_params={'format': 'coco', 'label_fields': None, 'min_area': 0.0, 'min_visibility': 0.0, 'check_each_transform': True}, keypoint_params=None, additional_targets={})

In [11]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

In [12]:
model = models.detection.fasterrcnn_resnet50_fpn()

in_features = model.roi_heads.box_predictor.cls_score.in_features
num_classes = 2

model.roi_heads.box_predictor = FastRCNNPredictor(in_features,num_classes)

Modifying the model to add a different backbone

In [13]:
# backbone =  torchvision.models.mobilenet_v2().features
# backbone.out_channels = 1280


# anchor_generator = AnchorGenerator(sizes=((32,64,128,256,512),),aspect_ratios=((0.5,1.0,2.0),))
# roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
#                                                 output_size=7,
#                                                 sampling_ratio=2)


# model = FasterRCNN(backbone,
#                    num_classes=2,
#                    rpn_anchor_generator=anchor_generator,
#                    box_roi_pool=roi_pooler)

In [14]:
import torch.optim as optim
from torch.optim import lr_scheduler

In [15]:
train_dataset

Dataset Antsandbees
    Number of datapoints: 10
    Root location: /home/roufa/Desktop/interview/pytorch/Image_detection
    Compose([
  Resize(always_apply=False, p=1, height=100, width=100, interpolation=1),
  ToTensorV2(always_apply=True, p=1.0, transpose_mask=False),
], p=1.0, bbox_params={'format': 'coco', 'label_fields': None, 'min_area': 0.0, 'min_visibility': 0.0, 'check_each_transform': True}, keypoint_params=None, additional_targets={})

In [16]:
dl_train = DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=4)

In [67]:
epochs = 10
lr = 0.05

optimizer = torch.optim.SGD(model.parameters() , lr, momentum = 0.9)
criterion = nn.CrossEntropyLoss()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
losses = []
model = model.to(device)

for epoch in range(epochs):
    for i,data in enumerate(dl_train):
        print(data)
        break
    break
#         inputs = data['image']
#         print(inputs)
        #inputs,label = data[0].to(device),data[1][0].to(device)
        #print(inputs ,'hiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiihjsd')
#         optimizer.zero_grad()
#         output = model(inputs)
#         loss = criterion(output,label)
#         loss.backward()
#         optimizer.step()
#         losses.append(loss.item())
        
        


{'image': tensor([[[ 98, 105, 112,  ...,  92,  92,  92],
         [ 98, 105, 113,  ...,  90,  91,  87],
         [103, 106, 112,  ...,  95,  91,  90],
         ...,
         [163, 167, 163,  ..., 125, 126, 126],
         [161, 161, 162,  ..., 125, 124, 122],
         [153, 156, 155,  ..., 122, 121, 117]],

        [[121, 124, 129,  ..., 110, 105,  99],
         [124, 127, 131,  ..., 110, 107, 107],
         [126, 132, 134,  ..., 112, 108, 107],
         ...,
         [159, 158, 159,  ..., 126, 125, 126],
         [157, 156, 155,  ..., 124, 123, 121],
         [152, 155, 156,  ..., 121, 120, 116]],

        [[107, 108, 114,  ...,  98,  95,  91],
         [113, 114, 119,  ...,  98,  96,  98],
         [118, 119, 121,  ..., 104, 100,  99],
         ...,
         [148, 149, 150,  ..., 120, 120, 118],
         [145, 150, 149,  ..., 119, 118, 116],
         [143, 144, 148,  ..., 116, 116, 111]]], dtype=torch.uint8), 'bboxes': [(13.600000000000001, 20.266666666666666, 12.399999999999999, 31.4

         [  3,  13,   7,  ...,  82,  81,  81]]], dtype=torch.uint8), 'bboxes': [(20.599999999999998, 8.009708737864079, 55.80000000000001, 41.99029126213592, 1)]}
{'image': tensor([[[ 13,   7,   4,  ..., 254, 255, 252],
         [ 16,   6,   1,  ..., 250, 251, 250],
         [  5,   4,   1,  ..., 248, 249, 252],
         ...,
         [ 27,  54,  75,  ...,  73,  56,  55],
         [ 55,  74,  73,  ...,  71,  55,  57],
         [ 77,  78,  65,  ...,  58,  65,  60]],

        [[  7,   1,   0,  ..., 219, 225, 224],
         [ 10,   0,   0,  ..., 219, 224, 221],
         [  2,   1,   0,  ..., 218, 221, 226],
         ...,
         [ 32,  84, 125,  ..., 112, 103, 100],
         [ 85, 123, 123,  ..., 110, 103, 106],
         [123, 124, 111,  ..., 101, 110, 104]],

        [[ 17,  11,   9,  ...,  66,  79,  82],
         [ 20,  10,   8,  ...,  69,  81,  83],
         [ 11,  10,  10,  ...,  62,  77,  91],
         ...,
         [ 30,  38,  48,  ...,  17,  22,  17],
         [ 47,  58,  50,  ...

In [17]:
for k in dl_train:
    print(k)
    break

{'image': tensor([[[ 47,  49,  48,  ...,  51,  51,  65],
         [ 49,  51,  51,  ...,  42,  45,  59],
         [ 55,  55,  57,  ...,  30,  39,  52],
         ...,
         [102,  91,  78,  ...,  79, 127, 154],
         [108,  97,  83,  ..., 149, 149, 144],
         [115, 104,  88,  ..., 130, 130, 124]],

        [[141, 144, 142,  ..., 130, 131, 135],
         [143, 145, 145,  ..., 123, 124, 130],
         [146, 146, 148,  ..., 114, 117, 127],
         ...,
         [ 92,  84,  75,  ...,  68, 119, 152],
         [ 96,  88,  78,  ..., 147, 147, 147],
         [101,  95,  81,  ..., 136, 134, 133]],

        [[143, 146, 148,  ..., 130, 127, 135],
         [143, 145, 149,  ..., 130, 129, 132],
         [147, 147, 149,  ..., 125, 127, 130],
         ...,
         [ 67,  66,  60,  ...,  62,  94, 114],
         [ 72,  71,  64,  ..., 125, 128, 124],
         [ 78,  78,  67,  ..., 118, 123, 123]]], dtype=torch.uint8), 'bboxes': [(46.800000000000004, 29.83425414364641, 24.79999999999999, 27.071

         [  2,   1,   1,  ..., 110, 103, 108]]], dtype=torch.uint8), 'bboxes': [(23.599999999999998, 13.813813813813812, 45.400000000000006, 82.58258258258259, 0)]}
{'image': tensor([[[ 13,   7,   4,  ..., 254, 255, 252],
         [ 16,   6,   1,  ..., 250, 251, 250],
         [  5,   4,   1,  ..., 248, 249, 252],
         ...,
         [ 27,  54,  75,  ...,  73,  56,  55],
         [ 55,  74,  73,  ...,  71,  55,  57],
         [ 77,  78,  65,  ...,  58,  65,  60]],

        [[  7,   1,   0,  ..., 219, 225, 224],
         [ 10,   0,   0,  ..., 219, 224, 221],
         [  2,   1,   0,  ..., 218, 221, 226],
         ...,
         [ 32,  84, 125,  ..., 112, 103, 100],
         [ 85, 123, 123,  ..., 110, 103, 106],
         [123, 124, 111,  ..., 101, 110, 104]],

        [[ 17,  11,   9,  ...,  66,  79,  82],
         [ 20,  10,   8,  ...,  69,  81,  83],
         [ 11,  10,  10,  ...,  62,  77,  91],
         ...,
         [ 30,  38,  48,  ...,  17,  22,  17],
         [ 47,  58,  50,  .