In [1]:
!pip install datasets
!pip install matplotlib
!pip install numpy
!pip install torch
!pip install tqdm
!pip install torchvision
!pip install wandb
!pip install cython
!pip install pycocotools
!pip install matplotlib



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
cd /content/drive/MyDrive/Colab Notebooks/197_Assignment_2

/content/drive/MyDrive/Colab Notebooks/197_Assignment_2


In [4]:
ls

coco_eval.py   [0m[01;34mdrinks[0m/         model_weights.pth  train.ipynb    [01;34mwandb[0m/
coco_utils.py  engine.py       [01;34m__pycache__[0m/       transforms.py
config.py      label_utils.py  test.ipynb         utils.py


In [5]:
import torch
import numpy as np
import wandb
import torchvision
import os

import label_utils
import utils
import transforms as T

from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image

from engine import train_one_epoch, evaluate
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

if torch.cuda.is_available():
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

In [6]:
wandb.login()
config = {
    "num_workers": 0,
    "pin_memory": False,
    "batch_size": 8,
    "dataset": "drinks",
    "train_split": "drinks/labels_train.csv",
    "test_split": "drinks/labels_test.csv",}
run = wandb.init(project="197_Assignment_2", entity="arpm", config=config)

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33marpm[0m (use `wandb login --relogin` to force relogin)


In [7]:
test_dict, test_classes = label_utils.build_label_dictionary(
    config['test_split'])
train_dict, train_classes = label_utils.build_label_dictionary(
    config['train_split'])

class ImageDataset(torch.utils.data.Dataset):
    def __init__(self, root, dictionary, transform=None):
        self.dictionary = dictionary
        self.root = root
        self.transform = transform
        self.imgs = list(os.listdir(os.path.join(root, "drinks")))
    def __len__(self):
        return len(self.dictionary)

    def __getitem__(self, idx):

        key = list(self.dictionary.keys())[idx]
        img = Image.open(key)
        
        num_objs = len(self.dictionary[key])

        b_temp = self.dictionary[key]
        b_temp2 = [[j[i] for i in range(4)] for j in b_temp]  #remove class from value
        boxes = [[i[0], i[2], i[1], i[3]] for i in b_temp2] #in [xmin, ymin, xmax, ymax]

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.tensor([i[4] for i in b_temp], dtype=torch.int64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target['iscrowd'] = iscrowd
        #target["masks"] = torch.zeros(3,480,640)

        if self.transform:
            img = self.transform(img)

        return img, target

train_split = ImageDataset("", train_dict, transforms.ToTensor())
test_split = ImageDataset("", test_dict, transforms.ToTensor())

print("Train split len:", len(train_split), train_split)
print("Test split len:", len(test_split), test_split)

train_loader = DataLoader(train_split,
                          batch_size=config['batch_size'],
                          shuffle=False,
                          num_workers=config['num_workers'],
                          pin_memory=config['pin_memory'],
                          collate_fn=utils.collate_fn)

test_loader = DataLoader(test_split,
                         batch_size=config['batch_size'],
                         shuffle=False,
                         num_workers=config['num_workers'],
                         pin_memory=config['pin_memory'],
                         collate_fn=utils.collate_fn)

Train split len: 996 <__main__.ImageDataset object at 0x7f5bf793cbd0>
Test split len: 51 <__main__.ImageDataset object at 0x7f5cba3a8410>


In [8]:
def create_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 
    return model

In [9]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = create_model(4)
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
num_epochs = 4

In [10]:
for epoch in range(num_epochs):
  train_one_epoch(model, optimizer, train_loader, device, epoch, print_freq=25)
  lr_scheduler.step()
  evaluate(model, test_loader, device=device)

wandb.finish()
print("Saving the trained model")
torch.save(model.state_dict(), 'model_weights.pth')

Epoch: [0]  [  0/125]  eta: 0:12:36  lr: 0.000045  loss: 2.2116 (2.2116)  loss_classifier: 1.9000 (1.9000)  loss_box_reg: 0.2932 (0.2932)  loss_objectness: 0.0105 (0.0105)  loss_rpn_box_reg: 0.0079 (0.0079)  time: 6.0526  data: 0.1229  max mem: 9265
Epoch: [0]  [ 25/125]  eta: 0:09:11  lr: 0.001052  loss: 0.7975 (1.1627)  loss_classifier: 0.4344 (0.8183)  loss_box_reg: 0.3298 (0.3243)  loss_objectness: 0.0156 (0.0156)  loss_rpn_box_reg: 0.0043 (0.0045)  time: 5.5013  data: 0.1132  max mem: 9583
Epoch: [0]  [ 50/125]  eta: 0:06:51  lr: 0.002059  loss: 0.5059 (0.8550)  loss_classifier: 0.2033 (0.5223)  loss_box_reg: 0.2695 (0.3098)  loss_objectness: 0.0135 (0.0165)  loss_rpn_box_reg: 0.0052 (0.0064)  time: 5.4680  data: 0.1072  max mem: 9583
Epoch: [0]  [ 75/125]  eta: 0:04:33  lr: 0.003066  loss: 0.4466 (0.7295)  loss_classifier: 0.1721 (0.4143)  loss_box_reg: 0.2582 (0.2950)  loss_objectness: 0.0048 (0.0142)  loss_rpn_box_reg: 0.0054 (0.0060)  time: 5.4388  data: 0.1011  max mem: 9583


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Saving the trained model
