In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import os
import numpy as np
import pandas as pd


import torch
import torchvision
from torchvision import datasets, models
from torchvision.transforms import functional as FT
from torchvision import transforms as T
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, sampler, random_split, Dataset
import copy
import math
from PIL import Image
import cv2
import albumentations as A  # our data augmentation library
from pathlib import Path
import matplotlib.pyplot as plt
%matplotlib inline

# remove arnings (optional)
import warnings
warnings.filterwarnings("ignore")
from collections import defaultdict, deque
import datetime
import time
from tqdm import tqdm # progress bar
from torchvision.utils import draw_bounding_boxes


# Now, we will define our transforms
from albumentations.pytorch import ToTensorV2


## Dataset Preprocessing:

In [4]:
# !pip install pycocotools
from pycocotools.coco import COCO


### Transforms:

In [10]:
def get_transforms(train=False):
    if train:
        transform = A.Compose([
            A.Resize(600, 600), # our input size can be 600px
            A.HorizontalFlip(p=0.3),
            A.VerticalFlip(p=0.3),
            A.RandomBrightnessContrast(p=0.1),
            A.ColorJitter(p=0.1),
            ToTensorV2()
        ], bbox_params=A.BboxParams(format='coco'))
    else:
        transform = A.Compose([
            A.Resize(600, 600), # our input size can be 600px
            ToTensorV2()
        ], bbox_params=A.BboxParams(format='coco'))
    return transform

### Custom Dataset:

In [11]:
class AquariumDetection(datasets.VisionDataset):
    def __init__(self, root, split='train', transform=None, target_transform=None, transforms=None):
        # the 3 transform parameters are reuqired for datasets.VisionDataset
        super().__init__(root, transforms, transform, target_transform)
        self.split = split #train, valid, test
        self.coco = COCO(os.path.join(root, split, "_annotations.coco.json")) # annotatiosn stored here
        self.ids = list(sorted(self.coco.imgs.keys()))
        self.ids = [id for id in self.ids if (len(self._load_target(id)) > 0)]
    
    def _load_image(self, id: int):
        path = self.coco.loadImgs(id)[0]['file_name']
        image = cv2.imread(os.path.join(self.root, self.split, path))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        return image
    def _load_target(self, id):
        return self.coco.loadAnns(self.coco.getAnnIds(id))
    
    def __getitem__(self, index):
        id = self.ids[index]
        image = self._load_image(id)
        target = self._load_target(id)
        target = copy.deepcopy(self._load_target(id))
        
        boxes = [t['bbox'] + [t['category_id']] for t in target] # required annotation format for albumentations
        if self.transforms is not None:
            transformed = self.transforms(image=image, bboxes=boxes)
        
        image = transformed['image']
        boxes = transformed['bboxes']
        
        new_boxes = [] # convert from xywh to xyxy
        for box in boxes:
            xmin = box[0]
            xmax = xmin + box[2]
            ymin = box[1]
            ymax = ymin + box[3]
            new_boxes.append([xmin, ymin, xmax, ymax])
        
        boxes = torch.tensor(new_boxes, dtype=torch.float32)
        
        targ = {} # here is our transformed target
        targ['boxes'] = boxes
        targ['labels'] = torch.tensor([t['category_id'] for t in target], dtype=torch.int64)
        targ['image_id'] = torch.tensor([t['image_id'] for t in target])
        targ['area'] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) # we have a different area
        targ['iscrowd'] = torch.tensor([t['iscrowd'] for t in target], dtype=torch.int64)
        return image.div(255), targ # scale images
    def __len__(self):
        return len(self.ids)
        
        

### PATHS:

In [9]:
ROOT_WORKDIR = Path(os.getcwd())
ROOT_DATASET = Path("/media/hari/2TB_T7/Dataset/COCO_Dataset_2017")
PATH_TRAIN_IMAGE_DIR = ROOT_DATASET / "train2017"
PATH_TRAIN_JSON_DIR = ROOT_DATASET / "annotations_2017" / "instances_train2017.json"

path = [ROOT_DATASET, ROOT_WORKDIR, PATH_TRAIN_IMAGE_DIR, PATH_TRAIN_JSON_DIR]
for path in path:
    assert path.exists(), f"{path} does not exist."


In [None]:
coco_classes = COCO('/home/hari/Environment/project_finetuning/Kaggle_Aquarium_Dataset/train/_annotations.coco.json')
categories = coco_classes.cats
n_classes = len(coco_classes.cats)
print(categories)
classes = [i[1]['name'] for i in categories.items()]
print(classes)

# DataLoader 

In [9]:
# Define the dataset transforms
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),])
coco_train_dataset = torchvision.datasets.CocoDetection(root=PATH_TRAIN_IMAGE_DIR, annFile=PATH_TRAIN_JSON_DIR, transform=transform)

coco_subset = Subset(coco_train_dataset, np.random.choice(len(coco_train_dataset), 120))
def collate_fn(batch):
    return tuple(zip(*batch))
coco_subset_loader = DataLoader(coco_subset, batch_size=4, shuffle=True, num_workers=4, collate_fn=collate_fn)

loading annotations into memory...
Done (t=10.28s)
creating index...
index created!


### checking cell for dataloader

In [12]:
# Define the anchor generator for the Faster R-CNN model
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                   aspect_ratios=((0.5, 1.0, 2.0),))

# Model Fine Tuning
### Changing the model:

In [22]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [23]:
num_epoch = 3
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9)

In [28]:
img = []
tar = []
for idx, (im, ta) in enumerate(coco_subset_loader):
    img.append(im)
    tar.append(ta)
    if idx == 1:
        break
        

KeyboardInterrupt: 

In [25]:
for epoch in range(num_epoch):
    model.train()
    for images, targets in coco_subset_loader:
        # move images and targets to gpu if available
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        #clear gradients
        optimizer.zero_grad()
        
        # Forward pass
        loss_dict = model(images, targets)
        
        # Compute the total loss
        loss = sum(loss for loss in loss_dict.values())
        
        loss.backward()
        
        print(f"Epoch: {epoch}, Loss: {loss}")
        
        
        

KeyboardInterrupt: 

In [None]:
print(default_model)

In [None]:
batch = iter(coco_loader).__next__()

In [None]:
for epoch in range(num_epochs):
    if batch is not None:
        images, category_id, bbox = batch
        image = torch.stack(images, dim=0).to(device)
        print(image.shape)
        
        one_hot_lables = []
        for i, lab in enumerate(category_id):
            lab = lab.long()
            one_hot = torch.zeros(290)
            one_hot[lab] = 1
            one_hot_lables.append(one_hot)
            print(i)
        one_hot_lables = torch.stack(one_hot_lables, dim=0).to(device)
        
        class_pred, bbox_pred = default_model.forward(image)
        
        class_loss = criterion_cls(default_model(image), one_hot_lables)
        
        print(one_hot_lables.shape)

In [None]:
# Training
for epoch in range(num_epochs):
    epoch_loss = 0.0
    for idx, (image, category_id, bbox) in enumerate(coco_loader):
        image = torch.stack(image, dim=0).to(device)
        one_hot_lables = []
        for lab in category_id:
            lab = lab.long()
            one_hot = torch.zeros(219)
            one_hot[lab] = 1
            one_hot_lables.append(one_hot)
        one_hot_lables = torch.stack(one_hot_lables, dim=0).to(device)
        
        optimizer.zero_grad()
        if idx == 5:
            break
        
        
          
            
    