In [10]:
import sys
sys.path.append("..")

## Load Dataset

In [11]:
from datasets import load_dataset

# Load the dataset
dataset = load_dataset("jtz18/skin-lesion")

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


## Seeding

In [12]:
import torch
import numpy as np
import random
import os

def set_seed(seed):
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)
  os.environ['PYTHONHASHSEED'] = str(seed)

  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False

set_seed(42)

## Init Wandb

In [13]:
import wandb

# Define sweep config
sweep_configuration = {
    "name": "unet-sweep",
    "method": "bayes",
    "metric": {"goal": "maximize", "name": "Test Dice Score"},
    "parameters": {
        "learning_rate": {"values": [0.0001, 0.001, 0.01, 0.1]},
        "batch_size": {"values": [1, 2, 4, 8, 16, 32, 64]},
        "epochs": {"values": [3]},
        "dropout_rate": {"values": [0, 0.1, 0.2, 0.3, 0.4, 0.5]},
        "kernel_size": {"values": [3, 5, 7]},
    },
}

# Initialize sweep by passing in config.
sweep_id = wandb.sweep(sweep=sweep_configuration, project="unet-skin-lesion")

Create sweep with ID: yrz8av38
Sweep URL: https://wandb.ai/jooz-cave/unet-skin-lesion/sweeps/yrz8av38


In [14]:
# Hyperparameters etc.
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

DEVICE = device

NUM_WORKERS = 0

IMAGE_HEIGHT = 256
IMAGE_WIDTH = 256

PIN_MEMORY = True
LOAD_MODEL = False
CHECKPOINT_FILENAME = ""   # Model checkpoint filename if LOAD is True eg: checkpoints/checkpoint_9.pth.tar or None
CLASS = "task1"

# Define Training Function

In [15]:
from tqdm import tqdm
def train_fn(loader, model, optimizer, loss_fn, scaler):
    loop = tqdm(loader)

    for batch_idx, (data, targets) in enumerate(loop):
        data = data.to(device=DEVICE)
        targets = targets.float().unsqueeze(1).to(device=DEVICE)

        # forward
        with torch.cuda.amp.autocast():
            predictions = model(data)
            loss = loss_fn(predictions, targets)
            wandb.log({"Training Loss": loss})

        # backward
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # update tqdm loop
        loop.set_postfix(loss=loss.item())

## Augmentations

In [16]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch import nn
from src.utils import *


train_transform = A.Compose(
    [
        A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
        A.Rotate(limit=35, p=1.0),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.1),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std=[1.0, 1.0, 1.0],
            max_pixel_value=255.0,
        ),
        ToTensorV2(),
    ],
)

val_transforms = A.Compose(
    [
        A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std=[1.0, 1.0, 1.0],
            max_pixel_value=255.0,
        ),
        ToTensorV2(),
    ],
)

## Training Loop

In [17]:
from src.segFormer import Segformer
from src.maluNet import MALUNet
from src.unet import UNET

def main():
    run = wandb.init()

    # note that we define values from `wandb.config`
    # instead of defining hard values
    LEARNING_RATE = wandb.config.learning_rate
    BATCH_SIZE = wandb.config.batch_size
    NUM_EPOCHS = wandb.config.epochs
    DROPOUT_RATE = wandb.config.dropout_rate

    # model = Segformer(dropout=DROPOUT_RATE).to(device)
    model = UNET(dropout_rate=DROPOUT_RATE, kernel_size=wandb.config.kernel_size).to(device)
    loss_fn = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

    wandb.log({"MODEL": model.__class__.__name__})


    train_loader, val_loader, test_loader = get_loaders(
        dataset,
        BATCH_SIZE,
        train_transform,
        val_transforms,
        NUM_WORKERS,
        PIN_MEMORY,
    )

    start_epoch = 0
    if LOAD_MODEL:
        checkpoint = torch.load(CHECKPOINT_FILENAME)
        model, optimizer, start_epoch = load_checkpoint(checkpoint, model, optimizer)



    check_accuracy(val_loader, model, device=DEVICE, loss_fn=loss_fn)
    scaler = torch.cuda.amp.GradScaler()

    for epoch in range(start_epoch, NUM_EPOCHS):
        print(f"epoch: {epoch}")
        wandb.log({"epoch": epoch})
        train_fn(train_loader, model, optimizer, loss_fn, scaler)

        # save model
        checkpoint = {
            "state_dict": model.state_dict(),
            "optimizer":optimizer.state_dict(),
            "epoch": epoch,
        }
        save_checkpoint(checkpoint, filename=f"checkpoint_{epoch}.pth.tar")

        # check accuracy
        check_accuracy(val_loader, model, device=DEVICE, loss_fn=loss_fn)

        # print some examples to a folder
        save_predictions_as_imgs(
            val_loader, model, folder=f"saved_images/{epoch}", device=DEVICE
        )

    # Final Evaluation Test
    check_accuracy(test_loader, model, device=DEVICE, loss_fn=loss_fn, mode="test")
    run.finish()

In [18]:
wandb.agent(sweep_id, function=main)

[34m[1mwandb[0m: Agent Starting Run: pswqe8wg with config:
[34m[1mwandb[0m: 	batch_size: 4
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	kernel_size: 7
[34m[1mwandb[0m: 	learning_rate: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


0,1
MODEL,UNET


Run pswqe8wg errored:
Traceback (most recent call last):
  File "c:\Users\jonta\code\school\t8\DeepLearning\50.039-DL\venv\lib\site-packages\wandb\agents\pyagent.py", line 308, in _run_job
    self._function()
  File "C:\Users\jonta\AppData\Local\Temp\ipykernel_39072\3284872603.py", line 39, in main
    check_accuracy(val_loader, model, device=DEVICE, loss_fn=loss_fn)
  File "c:\Users\jonta\code\school\t8\DeepLearning\50.039-DL\notebooks\..\src\utils.py", line 123, in check_accuracy
    preds = torch.sigmoid(model(x))
  File "c:\Users\jonta\code\school\t8\DeepLearning\50.039-DL\venv\lib\site-packages\torch\nn\modules\module.py", line 1511, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "c:\Users\jonta\code\school\t8\DeepLearning\50.039-DL\venv\lib\site-packages\torch\nn\modules\module.py", line 1520, in _call_impl
    return forward_call(*args, **kwargs)
  File "c:\Users\jonta\code\school\t8\DeepLearning\50.039-DL\notebooks\..\src\unet.py", line 56, in forward

Validation Loss: 0.8480826616287231
Got 4627214/6553600 with acc 70.61
Dice score: 0.5926533937454224
epoch: 0


100%|██████████| 82/82 [07:23<00:00,  5.41s/it, loss=0.875]


=> Saving checkpoint
Validation Loss: 0.679804190993309
Got 5443701/6553600 with acc 83.06
Dice score: 0.6067339181900024
epoch: 1


 15%|█▍        | 12/82 [01:00<06:25,  5.51s/it, loss=0.294]