<a href="https://colab.research.google.com/github/AbdElRahmanFarhan/box_semantic_segmentation/blob/main/hyperparameter_tunning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [70]:
!pip install pycocotools
!pip install coco-eval
!pip install wandb



In [71]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [72]:
import os
import wandb
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
wandb.login()

True

In [73]:
import zipfile

zip_file_path = '/content/drive/MyDrive/OSCD.zip'
dataset_folder = '/content/drive/MyDrive/OSCD/'

if len(os.listdir(dataset_folder)) == 0:
  with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
      zip_ref.extractall(dataset_folder)

  print(f"Unzipped to: {dataset_folder}")

In [74]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

train_folder = os.path.join(dataset_folder, 'coco_carton/oneclass_carton/images/train2017')
val_folder = os.path.join(dataset_folder, 'coco_carton/oneclass_carton/images/val2017')
train_annotation = os.path.join(dataset_folder, 'coco_carton/oneclass_carton/annotations/instances_train2017.json')
val_annotation = os.path.join(dataset_folder, 'coco_carton/oneclass_carton/annotations/instances_val2017.json')

In [75]:
import torch
from torchvision.datasets import CocoDetection
import torchvision.transforms as T
from PIL import Image
from torchvision.tv_tensors import Mask
from torchvision.transforms.functional import to_tensor
from skimage.draw import polygon as sk_polygon


class OSCDDataset(CocoDetection):
    def __init__(self, img_folder, ann_file):
        super().__init__(img_folder, ann_file, transforms=None)

    def __len__(self) -> int:
       return super().__len__()

    def __getitem__(self, idx):
        img, anns = super().__getitem__(idx)

        if anns:
          labels = []
          areas = []
          iscrowd = []
          masks = []
          boxes = []
          ids = []
          for ann in anns:
              x, y, w, h = list(map(int, ann['bbox']))
              boxes.append([x, y, x + w, y + h])
              labels.append(ann['category_id'])
              areas.append(ann['area'])
              iscrowd.append(ann['iscrowd'])
              mask = self.get_mask(ann['segmentation'], img.size[1], img.size[0])
              masks.append(mask)
              ids.append(ann['id'])

          labels = torch.tensor(labels, dtype=torch.int64)
          areas = torch.tensor(areas, dtype=torch.float16)
          iscrowd = torch.tensor(iscrowd, dtype=torch.uint8)
          boxes = torch.tensor(boxes, dtype=torch.int64)
          masks = torch.stack(masks, dim=0)
          ids = torch.tensor(ids, dtype=torch.int64)
          img_id = torch.tensor(ann['image_id'], dtype=torch.int64)

          target = {
              "boxes": boxes,
              "labels": labels,
              "image_id": img_id,
              "ids": ids,
              "area": areas, # TODO: is it area or areas
              "iscrowd": iscrowd,
              "masks": Mask(masks),
          }
        else:
          target = {}
        img = to_tensor(img) # TODO: return an empty image
        return img, target

    def get_mask(self, segmentation, height, width):
        mask = torch.zeros((height, width), dtype=torch.bool)
        poly_x = segmentation[0][::2]
        poly_y = segmentation[0][1::2]
        rr, cc = sk_polygon(poly_y, poly_x, shape=(height, width))
        mask[rr, cc] = 1
        return mask


In [76]:
from torchvision.transforms import v2 as T
train_dataset = OSCDDataset(train_folder, train_annotation)
val_dataset = OSCDDataset(val_folder, val_annotation)
train_dataset_small = torch.utils.data.Subset(train_dataset, list(range(100)))
val_dataset_small = torch.utils.data.Subset(val_dataset, list(range(10)))

loading annotations into memory...
Done (t=1.35s)
creating index...
index created!
loading annotations into memory...
Done (t=0.14s)
creating index...
index created!


In [77]:
def collate_fn(batch):
    images, targets = [], []
    for (image, target) in batch:
      if not target:
        continue
      else:
        images.append(image)
        targets.append(target)
    return images, targets

In [78]:
# import matplotlib.pyplot as plt
# from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks
# train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, collate_fn=collate_fn, pin_memory=True, num_workers=2)
# val_loader = DataLoader(val_dataset, batch_size=1, shuffle=True, collate_fn=collate_fn, pin_memory=True)
# images, targets = next(iter(val_loader))
# images = [image for image in images]
# targets = [{k: v for k, v in t.items()} for t in targets]
# img = images[0]
# target = targets[0]
# # plt_img = draw_bounding_boxes(img, target_boxes, colors="red")
# masks = (target["masks"] > 0.9).squeeze(1)
# plt_img = draw_segmentation_masks(img, masks, alpha=0.5, colors="blue")
# plt.figure(figsize=(12, 12))
# plt.imshow(plt_img.cpu().permute(1, 2, 0))
# plt.axis('off')
# plt.show()

In [79]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights="DEFAULT")
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)

in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, 256, 2)

In [80]:
# import torchvision
# from torchvision.models.detection import MaskRCNN
# from torchvision.models.detection.backbone_utils import BackboneWithFPN
# from torchvision.models.resnet import resnet18
# from torchvision.models.detection.backbone_utils import BackboneWithFPN

# resnet = resnet18(pretrained=True)
# backbone = torch.nn.Sequential(
#     resnet.conv1,
#     resnet.bn1,
#     resnet.relu,
#     resnet.maxpool,
#     resnet.layer1,
#     resnet.layer2,
#     resnet.layer3,
#     resnet.layer4
# )

# return_layers = {
#     '4': '0',  # layer1
#     '5': '1',  # layer2
#     '6': '2',  # layer3
#     '7': '3',  # layer4
# }

# in_channels_list = [64, 128, 256, 512]
# out_channels = 256

# fpn_backbone = BackboneWithFPN(
#     backbone,
#     return_layers=return_layers,
#     in_channels_list=in_channels_list,
#     out_channels=out_channels
# )

# model = MaskRCNN(backbone=fpn_backbone, num_classes=2)

In [81]:
# model

In [82]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda


In [83]:
import math
from tqdm.auto import tqdm
from torch.amp import autocast, GradScaler

def run_epoch(model, dataloader, optimizer, device, scaler, is_training):
    model.train()
    progress_bar = tqdm(total=len(dataloader), desc="Train" if is_training else "Valid")  # Initialize a progress bar
    batch_counter = 0
    epoch_loss = 0.
    epoch_losses = {
      'loss_classifier': 0,
      'loss_box_reg': 0.,
      'loss_mask': 0.,
      'loss_objectness': 0.,
      'loss_rpn_box_reg': 0.}
    for batch_id, (images, targets) in enumerate(dataloader):

        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        if len(targets) == 0:
          continue

        batch_counter += 1
        with autocast(device_type=device.type, dtype=torch.bfloat16):
            if is_training:
                losses = model(images, targets)
            else:
                with torch.no_grad():
                    losses = model(images, targets)

            loss = sum([loss for loss in losses.values()])

        if is_training:
            optimizer.zero_grad()
            if scaler:
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()
            else:
                loss.backward()
                optimizer.step()

        epoch_losses = {k: v.item() + epoch_losses[k] for k, v in losses.items()}
        loss_item = loss.item()
        epoch_loss += loss_item
        progress_bar_dict = dict(avg_loss=epoch_loss/(batch_counter+1))
        progress_bar.set_postfix(progress_bar_dict)
        progress_bar.update()
        if is_training:
          assert not math.isnan(loss_item) and math.isfinite(loss_item), "Loss is NaN or infinite. Stopping training."
    progress_bar.close()
    epoch_losses = {k: v/(batch_counter + 1) for k, v in epoch_losses.items()}
    return epoch_losses

In [84]:
sweep_config = {
    'method': 'random'
    }

metric = {
    'name': 'val_loss',
    'goal': 'minimize'
    }

sweep_config['metric'] = metric
parameters_dict = {
    'epochs': {
        'values': [5]
        },
    'lr': {
        'values': [5e-4]
        },
    'weight_decay': {
          'values': [1e-2]
        },
    'bs': {
          'values': [2]
        },
    'save_model_every': {
          'values': [5]
        },
    'scheduler': {
          'values': ['step']
        },
    'step_size': {
          'values': [5]
        },
    'gamma': {
          'values': [0.1]
        },
    'optimizer_type': {
          'values': ['adamw']
        },
    }

sweep_config['parameters'] = parameters_dict

In [85]:
sweep_id = wandb.sweep(sweep_config, project="box_segmentation")

Create sweep with ID: ewj9xie8
Sweep URL: https://wandb.ai/abdelrahman-farhan/box_segmentation/sweeps/ewj9xie8


In [86]:
from torch.optim import AdamW
import datetime

def train(config=None):
  with wandb.init(config=config):
    config = wandb.config
    lr = config.lr
    weight_decay = config.weight_decay
    epochs = config.epochs
    bs = config.bs
    save_every = config.save_model_every

    model.to(device)
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer_type = config.optimizer_type

    if optimizer_type == 'sgd':
      optimizer = torch.optim.SGD(params, lr=lr, momentum=0.9, weight_decay=weight_decay)
    elif optimizer_type == 'adamw':
      optimizer = torch.optim.AdamW(params, lr=lr, weight_decay=weight_decay, amsgrad=True)

    scheduler = config.scheduler
    scheduler_step = config.step_size
    gamma = config.gamma
    if scheduler == 'step':
      lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=scheduler_step, gamma=gamma)
    elif scheduler == 'linear':
      lr_scheduler = torch.optim.lr_scheduler.LinearLR(optimizer, start_factor=1.0, end_factor=0.1, total_iters=epochs)
    elif scheduler == 'cyclic':
      lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=lr, total_steps=epochs)


    train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, collate_fn=collate_fn, pin_memory=True, num_workers=12)
    val_loader = DataLoader(val_dataset, batch_size=bs, shuffle=True, collate_fn=collate_fn, pin_memory=True, num_workers=12)

    scaler = GradScaler()
    for epoch in tqdm(range(epochs), desc="Epochs"):

        train_losses = run_epoch(model, train_loader, optimizer, device, scaler, is_training=True)

        with torch.no_grad():
            valid_losses = run_epoch(model, val_loader, None, device, scaler, is_training=False)
        lr_scheduler.step()

        train_losses = {f'train/{k}': v for k, v in train_losses.items()}
        wandb.log(train_losses)
        train_loss = sum(train_losses.values())
        wandb.log({'train/loss': train_loss})

        valid_losses = {f'valid/{k}': v for k, v in valid_losses.items()}
        wandb.log(valid_losses)
        valid_loss = sum(valid_losses.values())
        wandb.log({'valid/loss': valid_loss})

        wandb.log({'lr': lr_scheduler.get_last_lr()[0]})

        if (epoch+1) % save_every == 0:
          model_path = os.path.join(dataset_folder, 'model', f'model_{wandb.run.sweep_id}_{(epoch+1)}.pth')
          torch.save(model.state_dict(), model_path)

In [None]:
wandb.agent(sweep_id, train, count=1)

[34m[1mwandb[0m: Agent Starting Run: 5055pggv with config:
[34m[1mwandb[0m: 	bs: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	gamma: 0.1
[34m[1mwandb[0m: 	lr: 0.0005
[34m[1mwandb[0m: 	optimizer_type: adamw
[34m[1mwandb[0m: 	save_model_every: 5
[34m[1mwandb[0m: 	scheduler: step
[34m[1mwandb[0m: 	step_size: 5
[34m[1mwandb[0m: 	weight_decay: 0.01


Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Train:   0%|          | 0/3701 [00:00<?, ?it/s]

In [None]:
print("test")

In [None]:
print([p.requires_grad for p in model.backbone.parameters()])