In [1]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# load a model pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")

# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 2  # 1 class (person) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(


In [2]:
from torch.optim import SGD
from torch.nn import BCELoss
from torch.utils.data import DataLoader

In [3]:
criterion = BCELoss()
optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)

Modifying the model to add a different backbone

In [4]:
from poolDatasetV2 import PoolDatasetV2
import os
import torch
from torchvision.transforms import v2

In [5]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [61]:
transforms = v2.Compose([
    v2.Resize((224, 224)),
    v2.ToImage(),
    #v2.RandomHorizontalFlip(p=1),
    v2.ToDtype(torch.float32, scale=True),
    #v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

ROOT_DIR = os.getcwd() + "\\dataset\\images"

dataset = PoolDatasetV2(ROOT_DIR, transforms)

In [62]:
train, test = dataset.split_Data()
print(len(train), len(test))

4755 2342


In [58]:
data = [train[0], train[1]]
# data

In [63]:

def collate_fn(batch):
    images = [item[0] for item in batch]
    labels = [item[1] for item in batch]

    max_size_labels = max([len(label) for label in labels])

    for label in labels:
        if len(label) < max_size_labels:
            toadd = torch.Tensor([0.0, 0.0, 0.0, 0.0])
            label.extend([toadd for i in range(max_size_labels - len(label))])

    return images, labels


# collate_fn(data)

In [64]:
train_dl = DataLoader(train, batch_size=32, shuffle=True, collate_fn=collate_fn)

# test_dl = DataLoader(test, batch_size=1171, shuffle=False)

In [None]:
for i, (images, labels) in enumerate(train_dl):
    print(i, images, labels)
    break

In [None]:
for epoch in range(10):
    model.train()
    for images, targets in train_dl:
        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        losses.backward()
        optimizer.step()
    print(f"Epoch {epoch} Loss: {losses}")

    model.eval()
    for images, targets in test_dl:
        with torch.no_grad():
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
    print(f"Epoch {epoch} Loss: {losses}")

In [27]:
from torch.nn.utils.rnn import pad_sequence #(1)

def custom_collate(data): #(2)
    inputs = [torch.tensor(d['tokenized_input']) for d in data] #(3)
    labels = [d['label'] for d in data]

    inputs = pad_sequence(inputs, batch_first=True) #(4)
    labels = torch.tensor(labels) #(5)

    return { #(6)
        'tokenized_input': inputs,
        'label': labels
    }

[(tensor([[[0.5608, 0.5412, 0.5333,  ..., 0.3059, 0.2980, 0.2471],
           [0.6000, 0.5529, 0.5373,  ..., 0.2667, 0.2706, 0.2667],
           [0.4706, 0.5098, 0.5020,  ..., 0.2549, 0.2667, 0.2510],
           ...,
           [0.3608, 0.3804, 0.3804,  ..., 0.5451, 0.5255, 0.5059],
           [0.3686, 0.3608, 0.3608,  ..., 0.5451, 0.5294, 0.5216],
           [0.4000, 0.4118, 0.4235,  ..., 0.5490, 0.5333, 0.5333]],
  
          [[0.5294, 0.5294, 0.5137,  ..., 0.4275, 0.4078, 0.3569],
           [0.5804, 0.5412, 0.5176,  ..., 0.3725, 0.3804, 0.3725],
           [0.4745, 0.5020, 0.4824,  ..., 0.3569, 0.3686, 0.3569],
           ...,
           [0.4510, 0.4784, 0.4824,  ..., 0.5451, 0.5176, 0.4863],
           [0.4510, 0.4510, 0.4471,  ..., 0.5412, 0.5176, 0.5020],
           [0.4745, 0.4863, 0.4941,  ..., 0.5412, 0.5216, 0.5137]],
  
          [[0.5059, 0.4824, 0.4471,  ..., 0.3255, 0.3098, 0.2588],
           [0.5608, 0.5059, 0.4706,  ..., 0.2745, 0.2824, 0.2784],
           [0.4314, 0.

In [31]:
# values are token indices but it does not matter - it can be any kind of variable-size data
nlp_data = [
    {'tokenized_input': [1, 4, 5, 9, 3, 2],
     'label':0},
    {'tokenized_input': [1, 7, 3, 14, 48, 7, 23, 154, 2],
     'label':0},
    {'tokenized_input': [1, 30, 67, 117, 21, 15, 2],
     'label':1},
    {'tokenized_input': [1, 17, 2],
     'label':0},
]
loader = DataLoader(nlp_data, batch_size=2, shuffle=False, collate_fn=custom_collate)
batch = next(iter(loader))

In [32]:
batch

{'tokenized_input': tensor([[  1,   4,   5,   9,   3,   2,   0,   0,   0],
         [  1,   7,   3,  14,  48,   7,  23, 154,   2]]),
 'label': tensor([0, 0])}