<a href="https://colab.research.google.com/github/ARPM01/FasterRCNN_finetuned/blob/main/train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install datasets
!pip install matplotlib
!pip install numpy
!pip install torch
!pip install tqdm
!pip install torchvision
!pip install wandb
!pip install cython
!pip install pycocotools
!pip install matplotlib

Collecting datasets
  Downloading datasets-2.1.0-py3-none-any.whl (325 kB)
[K     |████████████████████████████████| 325 kB 4.9 MB/s 
Collecting huggingface-hub<1.0.0,>=0.1.0
  Downloading huggingface_hub-0.5.1-py3-none-any.whl (77 kB)
[K     |████████████████████████████████| 77 kB 7.0 MB/s 
[?25hCollecting responses<0.19
  Downloading responses-0.18.0-py3-none-any.whl (38 kB)
Collecting xxhash
  Downloading xxhash-3.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)
[K     |████████████████████████████████| 212 kB 65.7 MB/s 
Collecting aiohttp
  Downloading aiohttp-3.8.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 62.1 MB/s 
Collecting fsspec[http]>=2021.05.0
  Downloading fsspec-2022.3.0-py3-none-any.whl (136 kB)
[K     |████████████████████████████████| 136 kB 72.5 MB/s 
Collecting urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1
  Downloading urllib3-1.25

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
cd /content/drive/MyDrive/Colab Notebooks/197_Assignment_2

/content/drive/MyDrive/Colab Notebooks/197_Assignment_2


In [None]:
ls

coco_eval.py   config.py  engine.py       [0m[01;34mmodel[0m/        train.ipynb    utils.py
coco_utils.py  [01;34mdrinks[0m/    label_utils.py  [01;34m__pycache__[0m/  transforms.py  [01;34mwandb[0m/


In [None]:
import torch
import numpy as np
import wandb
import torchvision
import os

import label_utils
import utils
import transforms as T

from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image

from engine import train_one_epoch, evaluate
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

if torch.cuda.is_available():
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

In [None]:
#wandb.login()
config = {
    "num_workers": 0,
    "pin_memory": False,
    "batch_size": 8,
    "dataset": "drinks",
    "train_split": "drinks/labels_train.csv",
    "test_split": "drinks/labels_test.csv",}
#run = wandb.init(project="197_Assignment_2", entity="arpm", config=config)

In [None]:
test_dict, test_classes = label_utils.build_label_dictionary(
    config['test_split'])
train_dict, train_classes = label_utils.build_label_dictionary(
    config['train_split'])

class ImageDataset(torch.utils.data.Dataset):
    def __init__(self, root, dictionary, transform=None):
        self.dictionary = dictionary
        self.root = root
        self.transform = transform
        self.imgs = list(os.listdir(os.path.join(root, "drinks")))
    def __len__(self):
        return len(self.dictionary)

    def __getitem__(self, idx):

        key = list(self.dictionary.keys())[idx]
        img = Image.open(key)
        
        num_objs = len(self.dictionary[key])

        b_temp = self.dictionary[key]
        b_temp2 = [[j[i] for i in range(4)] for j in b_temp]  #remove class from value
        boxes = [[i[0], i[2], i[1], i[3]] for i in b_temp2] #in [xmin, ymin, xmax, ymax]

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.tensor([i[4] for i in b_temp], dtype=torch.int64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target['iscrowd'] = iscrowd
        #target["masks"] = torch.zeros(3,480,640)

        if self.transform:
            img = self.transform(img)

        return img, target

train_split = ImageDataset("", train_dict, transforms.ToTensor())
test_split = ImageDataset("", test_dict, transforms.ToTensor())

print("Train split len:", len(train_split), train_split)
print("Test split len:", len(test_split), test_split)

train_loader = DataLoader(train_split,
                          batch_size=config['batch_size'],
                          shuffle=False,
                          num_workers=config['num_workers'],
                          pin_memory=config['pin_memory'],
                          collate_fn=utils.collate_fn)

test_loader = DataLoader(test_split,
                         batch_size=config['batch_size'],
                         shuffle=False,
                         num_workers=config['num_workers'],
                         pin_memory=config['pin_memory'],
                         collate_fn=utils.collate_fn)

Train split len: 996 <__main__.ImageDataset object at 0x7f5c0aff6090>
Test split len: 51 <__main__.ImageDataset object at 0x7f5c0afe37d0>


In [None]:
def create_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 
    return model

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = create_model(4)
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
num_epochs = 2

In [None]:
evaluate(model, test_loader, device=device)

creating index...
index created!
Test:  [0/7]  eta: 0:00:09  model_time: 1.2853 (1.2853)  evaluator_time: 0.0479 (0.0479)  time: 1.4210  data: 0.0805  max mem: 6753
Test:  [6/7]  eta: 0:00:00  model_time: 0.8226 (0.8127)  evaluator_time: 0.0428 (0.0416)  time: 0.9322  data: 0.0714  max mem: 6753
Test: Total time: 0:00:06 (0.9334 s / it)
Averaged stats: model_time: 0.8226 (0.8127)  evaluator_time: 0.0428 (0.0416)
Accumulating evaluation results...
DONE (t=0.05s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.002
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.001
 Average Recall     (AR) @[ IoU=0.5

<coco_eval.CocoEvaluator at 0x7f5c0af807d0>

In [None]:
for epoch in range(num_epochs):
  train_one_epoch(model, optimizer, train_loader, device, epoch, print_freq=5)
  lr_scheduler.step()
  evaluate(model, test_loader, device=device)

wandb.finish()
print("Saving the trained model")
torch.save(model.state_dict(), 'model_weights.pth')

Epoch: [0]  [  0/125]  eta: 0:04:34  lr: 0.000045  loss: 1.6807 (1.6807)  loss_classifier: 1.3549 (1.3549)  loss_box_reg: 0.3028 (0.3028)  loss_objectness: 0.0150 (0.0150)  loss_rpn_box_reg: 0.0079 (0.0079)  time: 2.1980  data: 0.0907  max mem: 9432
Epoch: [0]  [  5/125]  eta: 0:04:16  lr: 0.000247  loss: 1.6688 (1.6269)  loss_classifier: 1.3014 (1.2823)  loss_box_reg: 0.3043 (0.3237)  loss_objectness: 0.0148 (0.0156)  loss_rpn_box_reg: 0.0050 (0.0053)  time: 2.1385  data: 0.0878  max mem: 9750
Epoch: [0]  [ 10/125]  eta: 0:04:06  lr: 0.000448  loss: 1.4422 (1.3391)  loss_classifier: 1.1280 (0.9928)  loss_box_reg: 0.3295 (0.3258)  loss_objectness: 0.0150 (0.0151)  loss_rpn_box_reg: 0.0054 (0.0054)  time: 2.1399  data: 0.0873  max mem: 9750
Epoch: [0]  [ 15/125]  eta: 0:03:55  lr: 0.000650  loss: 0.9796 (1.1326)  loss_classifier: 0.6373 (0.7900)  loss_box_reg: 0.3286 (0.3241)  loss_objectness: 0.0125 (0.0132)  loss_rpn_box_reg: 0.0053 (0.0054)  time: 2.1443  data: 0.0863  max mem: 10180

In [None]:
# pick one image from the test set
img, _ = test_split[0]
# put the model in evaluation mode
model.eval()
with torch.no_grad():
    prediction = model([img.to(device)])
    
Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())

In [None]:
# sample one mini-batch
images, boxes = next(iter(train_loader))
# map of label to class name
class_labels = {i: label_utils.index2class(i) for i in train_classes}

#run.display(height=1000)
table = wandb.Table(columns=['Image'])

# we use wandb to visualize the objects and bounding boxes
for image, box in zip(images, boxes):
    dict = []
    for i in range(box.shape[0]):
        if box[i, -1] == 0:
            continue
        dict_item = {}
        dict_item["position"] = {
            "minX": box[i, 0].item(),
            "maxX": box[i, 1].item(),
            "minY": box[i, 2].item(),
            "maxY": box[i, 3].item(),
        }
        dict_item["domain"] = "pixel"
        dict_item["class_id"] = (int)(box[i, 4].item())
        dict_item["box_caption"] = label_utils.index2class(
            dict_item["class_id"])
        dict.append(dict_item)

    img = wandb.Image(image, boxes={
        "ground_truth": {
            "box_data": dict,
            "class_labels": class_labels
        }
    })
    table.add_data(img)

wandb.log({"train_loader": table})
wandb.finish()