In [1]:
%env CUDA_LAUNCH_BLOCKING=1

env: CUDA_LAUNCH_BLOCKING=1


In [2]:
# Installs
!pip install wandb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
# Google drive setup
from google.colab import drive
from os.path import join
from os import chdir

MOUNT_PATH_DRIVE = '/content/drive'
BASE_PATH = join(
      MOUNT_PATH_DRIVE, 
      "MyDrive/barco_skin_lesion_classification"
  )
CODE_PATH = join(BASE_PATH, 'src/')

# Mount the google drive
drive.mount(MOUNT_PATH_DRIVE)

# Set the base path of the project
chdir(CODE_PATH)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
# Imports
# Utils
import matplotlib as plt
import numpy as np
import wandb
import sys
import importlib
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import datetime


# DL libraries
import torch
import torch.optim as optim
from torch import nn
from torch.utils.data import DataLoader

# User libraries
from datasets.segmentationdataset import SegmentationDataset
from models.unet_model import UNet
from trainers.segmentation_model_trainer import train_segmentation_model
from validators.segmentation_model_validator import validate_segmentation_model
from util import config, model_management

# Data

In [5]:
# Get the data
train_segmentation_dataset = SegmentationDataset(
    join(BASE_PATH, config.SEGMENTATION_DATA_PATH_TRAIN_FEATURES),
    join(BASE_PATH, config.SEGMENTATION_DATA_PATH_TRAIN_LABELS),
    config.SEGMENTATION_TRAIN_TRANSFORMATIONS_BOTH
    )

test_segmentation_dataset = SegmentationDataset(
    join(BASE_PATH, config.SEGMENTATION_DATA_PATH_TEST_FEATURES),
    join(BASE_PATH, config.SEGMENTATION_DATA_PATH_TEST_LABELS),
    config.SEGMENTATION_TEST_TRANSFORMATIONS_BOTH
    )

# Place the datasets in dataloaders
train_segmentation_dataloader = DataLoader(train_segmentation_dataset, batch_size=config.SEGMENTATION_BATCH_SIZE)
test_segmentation_dataloader = DataLoader(test_segmentation_dataset, batch_size=1)



# Setup

In [6]:
# Get the model
model = UNet(n_channels = 3, n_classes = 1)
model.to(config.DEVICE)

# Set the optimizer
optimizer = optim.Adam(model.parameters(), lr=config.SEGMENTATION_LR)

# Set the loss fn
criteria = nn.BCEWithLogitsLoss()

# Set the gradient scaler
grad_scaler = torch.cuda.amp.grad_scaler.GradScaler()


# Setup weights and biasses
wandb.login()

# Start wandb
wandb.init(
    settings=wandb.Settings(start_method="fork"),
    project="segmentation", 
    entity="dermapool",
    name=f"experiment_{datetime.datetime.now()}", 
    config={
        "learning_rate": config.SEGMENTATION_LR,
        "batch_size": config.SEGMENTATION_BATCH_SIZE,
        "epochs": config.SEGMENTATION_EPOCHS,
    }
)

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mlaraluys[0m. Use [1m`wandb login --relogin`[0m to force relogin


# Training

In [8]:
# Set the variables to keep track of the best model
best_validation_loss = 10000
best_model_state = model.state_dict()

for epoch in range(config.SEGMENTATION_EPOCHS):
  # Set the model in training mode
  model.train()

  # Train the model
  total_train_loss_this_epoch = train_segmentation_model(
      model,
      optimizer,
      criteria,
      grad_scaler,
      train_segmentation_dataloader
  )
  
  # Set the model in evaluation mode
  model.eval()

  # Validate the model
  total_val_loss_this_epoch, sample_image_array = validate_segmentation_model(
      model,
      criteria,
      test_segmentation_dataloader,
      test_segmentation_dataset
  )

  # Convert the image array to a real imag object
  sample_image_array = sample_image_array.cpu()
  sample_image = Image.fromarray(np.uint8(sample_image_array) , 'L')

  # Calculate the loss values
  train_loss_this_epoch = total_train_loss_this_epoch/len(train_segmentation_dataloader.dataset)
  val_loss_this_epoch = total_val_loss_this_epoch/len(test_segmentation_dataloader.dataset)

  # Log the train loss this epoch
  wandb.log({
      'train_loss': train_loss_this_epoch,
      'val_loss': val_loss_this_epoch,
      'sample_image': wandb.Image(sample_image)
  })

  print(f'epoch: {epoch}, train_loss: {train_loss_this_epoch}, val_loss: {val_loss_this_epoch}')

  # If this is the best performing model yet, save it
  if val_loss_this_epoch < best_validation_loss:
    # Update the score
    best_validation_loss = val_loss_this_epoch

    # Save the model
    checkpoint_path = join(
      BASE_PATH, 
      config.SEGMENTATION_MODEL_CHECKPOINT_PATH, 
      f'chechpoint_{datetime.datetime.now()}.pth'
    )
    best_model_state = model_management.save_model(model, checkpoint_path, False)

    

  0%|          | 0/48 [00:00<?, ?it/s]


FileNotFoundError: ignored

In [None]:
# Save the final model
checkpoint_path = join(
    BASE_PATH, 
    config.SEGMENTATION_MODEL_CHECKPOINT_PATH, 
    f'chechpoint_{datetime.datetime.now()}.pth'
)
best_model_state = model_management.save_model(model, checkpoint_path, False)

In [None]:
# Mark the run as finished
wandb.finish()