In [1]:
import os

os.environ["WANDB_PROJECT"] = "lacuna_zindi_challenge"
os.environ["WANDB_LOG_MODEL"] = "false"
os.environ["WANDB_WATCH"] = "none"
os.environ["WANDB_NOTEBOOK_NAME"] = "train_hf"

import torch
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import models, datasets, ops
from torchvision.transforms import v2 as transforms
import lightning as L

from lightning.pytorch.loggers import WandbLogger

torch.set_float32_matmul_precision('medium') # | 'high'

In [2]:
from lightning.pytorch.utilities.types import EVAL_DATALOADERS

batch_size = 16

class FacesData(L.LightningDataModule):
	transform = transforms.Compose([
		transforms.ToTensor(),
		transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
		transforms.Resize(size=(800,), max_size=1333),
	])

	@staticmethod
	def convert_inputs(imgs, annot, device, small_thr=0.001):
		"""Conver dataset item to accepted target struture."""
		images, targets = [], []
		for img, annot in zip(imgs, annot):
			bbox = annot['bbox']
			small = (bbox[:, 2] * bbox[:, 3]) <= (img.size[1] * img.size[0] * small_thr)
			boxes = ops.box_convert(bbox[~small], in_fmt='xywh', out_fmt='xyxy')
			output_dict = FacesData.transform({"image": img, "boxes": boxes})
			images.append(output_dict['image'].to(device))
			targets.append({
				'boxes': output_dict['boxes'].to(device),
				'labels': torch.ones(len(boxes), dtype=int, device=device)
			})
		return images, targets
	
	@staticmethod
	def _collate_fn(batch):
		"""Define a collate function to handle batches."""
		return tuple(zip(*batch))

	def train_dataloader(self):
		train_dataset = datasets.WIDERFace(root='./data', split='train', download=True)
		return DataLoader(
			train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, collate_fn=self._collate_fn
		)
	
	def val_dataloader(self):
		train_dataset = datasets.WIDERFace(root='./data', split='val', download=True)
		return DataLoader(
			train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, collate_fn=self._collate_fn
		)




In [3]:
data = FacesData()

train = data.train_dataloader()

Files already downloaded and verified


In [4]:
example = next(iter(train))

In [None]:
example

((<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x668>,
  <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x528>,
  <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x1365>,
  <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x1024>,
  <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x630>,
  <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x1415>,
  <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x694>,
  <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x1754>,
  <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x1532>,
  <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x1324>,
  <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x800>,
  <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x768>,
  <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x769>,
  <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x690>,
  <PIL.JpegImagePlugin.JpegImageFile image

In [6]:
images, targets = FacesData.convert_inputs(example[0], example[1], device="cuda")

In [None]:
targets

[{'boxes': tensor([[556, 200, 580, 235],
          [309, 206, 335, 233]], device='cuda:0'),
  'labels': tensor([1, 1], device='cuda:0')},
 {'boxes': tensor([[154, 134, 191, 187],
          [496,  21, 522,  52],
          [584,  99, 610, 132],
          [402,  73, 430, 114],
          [598, 334, 632, 376],
          [497, 326, 531, 366],
          [412, 287, 446, 336],
          [834,  69, 919, 188]], device='cuda:0'),
  'labels': tensor([1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')},
 {'boxes': tensor([[304, 112, 619, 573]], device='cuda:0'),
  'labels': tensor([1], device='cuda:0')},
 {'boxes': tensor([[438, 106, 566, 278],
          [724, 118, 854, 288],
          [136, 118, 262, 282]], device='cuda:0'),
  'labels': tensor([1, 1, 1], device='cuda:0')},
 {'boxes': tensor([[380,  30, 448, 108]], device='cuda:0'),
  'labels': tensor([1], device='cuda:0')},
 {'boxes': tensor([[384, 295, 613, 601]], device='cuda:0'),
  'labels': tensor([1], device='cuda:0')},
 {'boxes': tensor([], device='cu

In [8]:
images

[tensor([[[ 1.9235,  1.9364,  1.9507,  ...,  2.1975,  2.1975,  2.1975],
          [ 1.9364,  1.9396,  1.9507,  ...,  2.1975,  2.1975,  2.1975],
          [ 1.9507,  1.9507,  1.9549,  ...,  2.1975,  2.1975,  2.1975],
          ...,
          [-1.1225, -1.2310, -1.3774,  ...,  1.2534,  1.2576,  1.2576],
          [-0.9965, -1.1125, -1.2639,  ...,  1.1729,  1.1829,  1.1829],
          [-0.9192, -1.0352, -1.1941,  ...,  1.1600,  1.1700,  1.1700]],
 
         [[ 2.0784,  2.0916,  2.1062,  ...,  2.3761,  2.3761,  2.3761],
          [ 2.0916,  2.0949,  2.1062,  ...,  2.3761,  2.3761,  2.3761],
          [ 2.1062,  2.1062,  2.1105,  ...,  2.3761,  2.3761,  2.3761],
          ...,
          [-1.0706, -1.1815, -1.3106,  ...,  1.3584,  1.3626,  1.3626],
          [-0.9418, -1.0604, -1.1946,  ...,  1.2760,  1.2863,  1.2863],
          [-0.8627, -0.9814, -1.1233,  ...,  1.2628,  1.2731,  1.2731]],
 
         [[ 2.3786,  2.3917,  2.4062,  ...,  2.5877,  2.5877,  2.5877],
          [ 2.3917,  2.3949,

In [9]:
model = models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights="DEFAULT")

In [10]:
model = model.cuda()
with torch.no_grad():
    preds = model(images, targets)

In [11]:
preds

{'loss_classifier': tensor(0.2817, device='cuda:0'),
 'loss_box_reg': tensor(0.0409, device='cuda:0'),
 'loss_objectness': tensor(0.6015, device='cuda:0'),
 'loss_rpn_box_reg': tensor(0.0535, device='cuda:0')}

In [13]:
model.eval()

with torch.no_grad():
    preds = model(images)

In [14]:
preds

[{'boxes': tensor([[142.0757, 366.5052, 167.6238, 448.6838],
          [765.8087, 563.9625, 888.4448, 684.9437]], device='cuda:0'),
  'labels': tensor([10, 85], device='cuda:0'),
  'scores': tensor([0.0667, 0.0543], device='cuda:0')},
 {'boxes': tensor([], device='cuda:0', size=(0, 4)),
  'labels': tensor([], device='cuda:0', dtype=torch.int64),
  'scores': tensor([], device='cuda:0')},
 {'boxes': tensor([[   0.0000,  173.3589,  767.8617, 1066.0000],
          [  64.2902,  381.8713,  759.6177, 1050.0862]], device='cuda:0'),
  'labels': tensor([ 1, 28], device='cuda:0'),
  'scores': tensor([0.6858, 0.1167], device='cuda:0')},
 {'boxes': tensor([], device='cuda:0', size=(0, 4)),
  'labels': tensor([], device='cuda:0', dtype=torch.int64),
  'scores': tensor([], device='cuda:0')},
 {'boxes': tensor([[301.3986,  46.7504, 663.4830, 561.3995],
          [289.9357,  37.9297, 662.0736, 330.5159]], device='cuda:0'),
  'labels': tensor([10,  1], device='cuda:0'),
  'scores': tensor([0.4412, 0.091

In [12]:
# Use a pretrained Faster R-CNN model from torchvision and modify it
class FaceDetectionModel(L.LightningModule):
	def __init__(self):
		super().__init__()
		self.model = models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights="DEFAULT")

	def forward(self, images, targets=None):
		return self.model(images, targets)

	def training_step(self, batch, batch_idx):
		imgs, annot = batch
		images, targets = FacesData.convert_inputs(imgs, annot, device=self.device)
		loss_dict = self.model(images, targets)
		losses = sum(loss for loss in loss_dict.values())
		self.log("loss", losses)
		self.log_dict(loss_dict)
		return losses
	
	# def validation_step(self, batch, batch_idx):
	# 	imgs, annot = batch
	# 	images, targets = FacesData.convert_inputs(imgs, annot, device=self.device)
	# 	loss_dict = self.model(images, targets)
	# 	losses = sum(loss for loss in loss_dict.values())
	# 	self.log("loss", losses)
	# 	self.log_dict(loss_dict)
	# 	return losses

	def configure_optimizers(self):
		return optim.SGD(self.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0005)


In [13]:
data = FacesData()
model = FaceDetectionModel()

wandb_logger = WandbLogger(log_model="none")

trainer = L.Trainer(
    max_epochs=5, precision="16-mixed", log_every_n_steps=50, logger=wandb_logger
)
trainer.fit(model, data)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/data/home/eak/learning/zindi_challenge/lacuna_chl/lacuna-malaria-detection-challenge/.venv/lib/python3.9/site-packages/lightning/pytorch/trainer/configuration_validator.py:68: You passed in a `val_dataloader` but have no `validation_step`. Skipping val loop.
You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_

Files already downloaded and verified
Epoch 1:   4%|▎         | 30/805 [00:21<09:22,  1.38it/s, v_num=0] 


Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined

In [14]:
model

FaceDetectionModel(
  (model): FasterRCNN(
    (transform): GeneralizedRCNNTransform(
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        Resize(min_size=(800,), max_size=1333, mode='bilinear')
    )
    (backbone): BackboneWithFPN(
      (body): IntermediateLayerGetter(
        (0): Conv2dNormActivation(
          (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (1): FrozenBatchNorm2d(16, eps=1e-05)
          (2): Hardswish()
        )
        (1): InvertedResidual(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
              (1): FrozenBatchNorm2d(16, eps=1e-05)
              (2): ReLU(inplace=True)
            )
            (1): Conv2dNormActivation(
              (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): FrozenBatchNorm2d(16, eps=1e-05)
  