In [491]:
import os
import numpy as np
import pandas as pd
from joblib import dump
from PIL import Image, ImageDraw
from skimage import measure
from skimage import draw
import json
from random import randrange

import utils
from collections import defaultdict
import pytorch_lightning as pl

import matplotlib.pyplot as plt
import matplotlib.patches as patches

import pycocotools

import torch
import torch.nn as nn
import torch.utils.data
from torch.optim.lr_scheduler import StepLR
from torchmetrics.detection.mean_ap import MeanAveragePrecision
import torchvision

#models
from torchvision import models
from torchvision.models.detection import faster_rcnn, RetinaNet, FCOS
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.models.detection.anchor_utils import AnchorGenerator, DefaultBoxGenerator
from torchvision import datasets, models, transforms

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [492]:
class Loss(nn.Module):
    def __init__(self):
        super().__init__()
        self.bce = nn.BCEWithLogitsLoss()  # For class labels
        self.smooth_l1 = nn.SmoothL1Loss()  # For bounding boxes

    def forward(self, preds, targets):
        label_loss = 0
        box_loss = 0
        for pred, target in zip(preds, targets):
            # Match predicted boxes to ground truth boxes
            pred_boxes = pred['boxes']
            target_boxes = target['boxes']
            ious = torchvision.ops.box_iou(pred_boxes, target_boxes)
            matches = ious.max(dim=1)[1]  # Match each predicted box to the ground truth box with the highest IoU

            # Only calculate loss for matched boxes
            for i, match in enumerate(matches):
                pred_label = pred['labels'][i]
                target_label = target['labels'][match]
                label_loss += self.bce(pred_label.float().unsqueeze(0), target_label.float().unsqueeze(0))

                pred_box = pred_boxes[i]
                target_box = target_boxes[match]
                box_loss += self.smooth_l1(pred_box.unsqueeze(0), target_box.unsqueeze(0))

        return label_loss + box_loss


In [493]:
class LitModel(pl.LightningModule):
    def __init__(self, model, optimizer, lr_scheduler):
        super(LitModel, self).__init__()
        self.model = model
        self.optimizer = optimizer
        self.lr_scheduler = lr_scheduler
        self.criterion = Loss()

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        images, targets = batch
        outputs = self(images)
        print("Outputs type:", type(outputs))
        print("Outputs content:", outputs)
        # Flatten the list of dicts into a single dict
        preds = {k: torch.cat([d[k] for d in outputs]) for k in outputs[0]}
        # Calculate loss
        loss = self.criterion(preds, targets)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        images, targets = batch
        outputs = self(images)
        #print("Model output for one batch:", outputs)
        loss = self.criterion(outputs, targets)  # Calculate loss
        self.log('val_loss', loss)
        return loss

    def configure_optimizers(self):
        return {
            'optimizer': self.optimizer,
            'lr_scheduler': self.lr_scheduler,
        }

In [494]:
def collate_fn(batch):
    images = [item[0] for item in batch]
    targets = [item[1] for item in batch]

    images = torch.stack(images)

    return images, targets





In [495]:
hyper_params = {"lr": 0.005,
                "momentum" : 0.9,
                "weight_decay" : 0.0005,
                "step_size" : 3,
                "gamma" : 0.1,
                "num_epochs" : 3,
                # num_classes = num of objects to identify + background class
                "num_classes" : 3,
                "model_name": "mask_rcnn",
                "feature_extract": False}

label_dict = {1: "penguin",
              2: "turtle"}

In [496]:
with open('train_annotations') as json_file:
    train_labels = json.load(json_file)
with open('valid_annotations') as json_file:
    valid_labels = json.load(json_file)

In [497]:
class MyTransform:
    def __init__(self, transform):
        self.transform = transform

    def __call__(self, image, target):
        # apply the transformation to image
        transformed_image = self.transform(image)
        # for this case, we don't apply any transformations to the target
        # but if you need to, you should implement it here
        return transformed_image, target

def get_transform(train):
    if train:
        transform = torchvision.transforms.Compose([
            torchvision.transforms.Resize((640, 640)),  # Resize all images to 640x640
            torchvision.transforms.RandomHorizontalFlip(),
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    else:
        transform = torchvision.transforms.Compose([
            torchvision.transforms.Resize((640, 640)),  # Resize all images to 640x640
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

    return MyTransform(transform)

In [498]:
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
  
    """ Returns the specified model with the specified parameters """

    if model_name =='mask_rcnn':

        """FastRCNN + MaskRCNN with ResNet50 backbone"""

        # load an object detection model pre-trained on COCO
        model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights='MaskRCNN_ResNet50_FPN_Weights.DEFAULT')
         
        # get the number of input features for the classifier
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        # replace the pre-trained head with a new one
        model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

        # now get the number of input features for the mask classifier
        in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
        hidden_layer = 256
        # and replace the mask predictor with a new one
        model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                         hidden_layer,
                                                         num_classes)
        return model

    if model_name =='fast_rcnn':

        """ Fast RCNN with ResNet50 backbone """

        # load an instance segmentation model pre-trained on COCO
        model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights='MaskRCNN_ResNet50_FPN_Weights.DEFAULT')
         
        # get the number of input features for the classifier
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        # replace the pre-trained head with a new one
        model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

        return model   


In [499]:
class PenguinsVsTurtles(torch.utils.data.Dataset):
  
    def __init__(self, root, annotations_list, transforms=None):

        self.root = root
        self.transforms = transforms
        self.imgs = list(sorted(os.listdir(os.path.join(root))))
        self.annotations_list = annotations_list
        #print(len(self.imgs))  # Print number of images
        #print(len(self.annotations_list))  # Print number of annotations

    def __getitem__(self, idx):
        img_path = os.path.join(self.root, self.imgs[idx])
        img = Image.open(img_path).convert("RGB")

        num_objs = len(self.annotations_list[idx])
        boxes = []
        for i in range(num_objs):
            box = self.annotations_list[idx][i]['bbox']
            boxes.append([box[0], box[1], box[0] + box[2], box[1] + box[3]])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)

        labels = torch.as_tensor([self.annotations_list[idx][i]['category_id'] for i in range(num_objs)], dtype=torch.int64)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        #print("Image shape:", img.shape)
        #print("Target:", target)
        print("TARGETTTTTTTTTTTTTTTTT")
        print(f'Idx: {idx}, Target: {target}')
        return img, target

    def __len__(self):
        return len(self.imgs)


In [500]:

def convert_to_per_image_annotations(annotations_list):
    #print(annotations_list)
    annotations_per_image = defaultdict(list)
    for ann in annotations_list:
        if isinstance(ann, list):
            ann = ann[0]  # extract the dictionary from the list
        annotations_per_image[ann['image_id']].append(ann)
    return list(annotations_per_image.values())

# Now preprocess your annotations_list
train_labels = convert_to_per_image_annotations(train_labels)
valid_labels = convert_to_per_image_annotations(valid_labels)

# use our dataset and defined transformations
train_dataset = PenguinsVsTurtles('./train', annotations_list=train_labels, transforms=get_transform(train=True))
valid_dataset = PenguinsVsTurtles('./valid', annotations_list=valid_labels, transforms=get_transform(train=False))

# define training and validation data loaders
train_data_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=2, shuffle=True, num_workers=0,
    collate_fn=collate_fn)

valid_data_loader = torch.utils.data.DataLoader(
    valid_dataset, batch_size=1, shuffle=False, num_workers=0,
    collate_fn=collate_fn)

In [501]:
for images, targets in train_data_loader:
    print(targets)
    break


TARGETTTTTTTTTTTTTTTTT
Idx: 296, Target: {'boxes': tensor([[140., 346., 489., 553.]]), 'labels': tensor([2]), 'image_id': tensor([296]), 'area': tensor([72243.]), 'iscrowd': tensor([0])}
TARGETTTTTTTTTTTTTTTTT
Idx: 308, Target: {'boxes': tensor([[169., 269., 476., 586.]]), 'labels': tensor([2]), 'image_id': tensor([308]), 'area': tensor([97319.]), 'iscrowd': tensor([0])}
[{'boxes': tensor([[140., 346., 489., 553.]]), 'labels': tensor([2]), 'image_id': tensor([296]), 'area': tensor([72243.]), 'iscrowd': tensor([0])}, {'boxes': tensor([[169., 269., 476., 586.]]), 'labels': tensor([2]), 'image_id': tensor([308]), 'area': tensor([97319.]), 'iscrowd': tensor([0])}]


In [502]:
print(train_dataset[0])  # Print the first example in the dataset

TARGETTTTTTTTTTTTTTTTT
Idx: 0, Target: {'boxes': tensor([[119.,  25., 324., 631.]]), 'labels': tensor([1]), 'image_id': tensor([0]), 'area': tensor([124230.]), 'iscrowd': tensor([0])}
(tensor([[[2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
         [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
         [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
         ...,
         [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
         [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
         [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489]],

        [[2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286],
         [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286],
         [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286],
         ...,
         [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286],
         [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286],
         [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286]],

   

In [503]:
print(len(train_dataset))
print(len(train_labels))

501
500


In [1]:
model = initialize_model(
    model_name=hyper_params["model_name"],
    num_classes=hyper_params["num_classes"],
    feature_extract=hyper_params["feature_extract"],
    use_pretrained=True,
)

NameError: name 'initialize_model' is not defined

In [505]:
optimizer = torch.optim.SGD(
    model.parameters(),
    lr=hyper_params["lr"],
    momentum=hyper_params["momentum"],
    weight_decay=hyper_params["weight_decay"],
)


In [506]:
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer,
    step_size=hyper_params["step_size"],
    gamma=hyper_params["gamma"],
)

In [507]:
lit_model = LitModel(model, optimizer, lr_scheduler)


In [508]:
print(model)

MaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(in

In [509]:
images, targets = next(iter(train_data_loader))
model = LitModel(model, optimizer, lr_scheduler)
outputs = model(images)
#print(outputs)


TARGETTTTTTTTTTTTTTTTT
Idx: 225, Target: {'boxes': tensor([[170., 276., 471., 362.]]), 'labels': tensor([2]), 'image_id': tensor([225]), 'area': tensor([25886.]), 'iscrowd': tensor([0])}
TARGETTTTTTTTTTTTTTTTT
Idx: 135, Target: {'boxes': tensor([[182., 229., 425., 464.]]), 'labels': tensor([2]), 'image_id': tensor([135]), 'area': tensor([57105.]), 'iscrowd': tensor([0])}


AssertionError: targets should not be none when in training mode

In [511]:
#trainer = pl.Trainer(max_epochs=hyper_params["num_epochs"])
trainer = pl.Trainer(max_epochs=1)
trainer.fit(lit_model, train_data_loader, valid_data_loader)


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name      | Type     | Params
---------------------------------------
0 | model     | MaskRCNN | 43.9 M
1 | criterion | Loss     | 0     
---------------------------------------
43.7 M    Trainable params
222 K     Non-trainable params
43.9 M    Total params
175.711   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]TARGETTTTTTTTTTTTTTTTT
Idx: 0, Target: {'boxes': tensor([[227.,  93., 525., 618.]]), 'labels': tensor([1]), 'image_id': tensor([0]), 'area': tensor([156450.]), 'iscrowd': tensor([0])}
Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:01<00:01,  1.91s/it]TARGETTTTTTTTTTTTTTTTT
Idx: 1, Target: {'boxes': tensor([[211., 198., 639., 457.]]), 'labels': tensor([2]), 'image_id': tensor([1]), 'area': tensor([110852.]), 'iscrowd': tensor([0])}
Epoch 0:   0%|          | 0/251 [00:00<?, ?it/s]                           TARGETTTTTTTTTTTTTTTTT
Idx: 361, Target: {'boxes': tensor([[281., 167., 640., 452.]]), 'labels': tensor([2]), 'image_id': tensor([361]), 'area': tensor([102315.]), 'iscrowd': tensor([0])}
TARGETTTTTTTTTTTTTTTTT
Idx: 250, Target: {'boxes': tensor([[260., 306., 374., 479.]]), 'labels': tensor([1]), 'image_id': tensor([250]), 'area': tensor([19722.]), 'iscrowd': tensor([0])}


AssertionError: targets should not be none when in training mode