In [1]:
%pip install wandb

[0mNote: you may need to restart the kernel to use updated packages.


In [2]:
def setup_file_system(in_colab):
    if in_colab:
        from google.colab import drive

        # Set the base and mount path
        MOUNT_PATH_DRIVE = '/content/drive'
        BASE_PATH = join(
            MOUNT_PATH_DRIVE, 
            "MyDrive/barco_skin_lesion_classification"
        )

        # Mount the google drive
        drive.mount(MOUNT_PATH_DRIVE)

        return BASE_PATH

    else:
        return "/workspaces/barco_skin_lesion_classification"

In [3]:
import sys
from os import chdir
from os.path import join

# Method to check if the notebook is running in colab or local
IN_COLAB = 'google.colab' in sys.modules

# Set the base path of the project
BASE_PATH = setup_file_system(IN_COLAB)

# Set the base path of the project
chdir(join(BASE_PATH, "src/"))

In [4]:
# Imports
# Utils
import matplotlib as plt
import numpy as np
import wandb
import sys
import importlib
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import datetime


# DL libraries
import torch
import torch.optim as optim
from torch import nn
from torch.utils.data import DataLoader

# User libraries
from datasets.classificationdataset import ClassificationDataset
from trainers.classifier_model_trainer import train_classification_model
from validators.classification_model_validator import validate_classification_model
from util import config, model_management

  from .autonotebook import tqdm as notebook_tqdm


# Data

In [5]:
# Get the data
train_classification_dataset = ClassificationDataset(
    join(BASE_PATH, config.CLASSIFICATION_DATA_PATH_TRAIN_UNSEGMENTED_FEATURES),
    join(BASE_PATH, config.METADATA_TRAIN_PATH),
    config.CLASSIFICATION_TRAIN_TRANSFORMATIONS
    )

test_classification_dataset = ClassificationDataset(
    join(BASE_PATH, config.CLASSIFICATION_DATA_PATH_TEST_FEATURES),
    join(BASE_PATH, config.METADATA_TEST_PATH),
    config.CLASSIFICATION_TEST_TRANSFORMATIONS
    )

# Place the datasets in dataloaders
train_classification_dataloader = DataLoader(train_classification_dataset, batch_size=config.CLASSIFICATION_BATCH_SIZE)
test_classification_dataloader = DataLoader(test_classification_dataset, batch_size=1)

# Setup

In [6]:
# Get the model
model = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_efficientnet_b0', pretrained=True)
model.to(config.DEVICE)

# Set the optimizer
optimizer = optim.Adam(model.parameters(), lr=config.CLASSIFICATION_LR)

# Set the loss fn
criteria = nn.CrossEntropyLoss()

# Set the gradient scaler
grad_scaler = torch.cuda.amp.grad_scaler.GradScaler()

# Setup weights and biasses
wandb.login()

# Get the current time for the checkpoint name
now = datetime.datetime.now()

# Start wandb
wandb.init(
    settings=wandb.Settings(start_method="fork"),
    project="classification", 
    entity="dermapool",
    name=f'experiment_{now.strftime("%m_%d_%Y_%H_%M_%S")}', 
    config={
        "learning_rate": config.CLASSIFICATION_LR,
        "batch_size": config.CLASSIFICATION_BATCH_SIZE,
        "epochs": config.CLASSIFICATION_EPOCHS,
        "image_dims": f'h: {config.CLASSIFICATION_IMAGE_HEIGHT}, w: {config.CLASSIFICATION_IMAGE_WIDTH}',
    }
)

Using cache found in /root/.cache/torch/hub/NVIDIA_DeepLearningExamples_torchhub
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mrobberdg[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Currently logged in as: [33mrobberdg[0m ([33mdermapool[0m). Use [1m`wandb login --relogin`[0m to force relogin


# Training

In [7]:
# Set the variables to keep track of the best model
best_validation_loss = 10000
best_model_state = model.state_dict()

for epoch in range(config.SEGMENTATION_EPOCHS):
  # Set the model in training mode
  model.train()

  # Train the model
  total_train_loss_this_epoch = train_classification_model(
      model,
      optimizer,
      criteria,
      grad_scaler,
      train_classification_dataloader
  )
  
  # Set the model in evaluation mode
  model.eval()

  # Validate the model
  total_val_loss_this_epoch = validate_classification_model(
      model,
      criteria,
      test_classification_dataloader,
  )

  # Calculate the loss values
  train_loss_this_epoch = total_train_loss_this_epoch/len(train_classification_dataloader.dataset)
  val_loss_this_epoch = total_val_loss_this_epoch/len(test_classification_dataloader.dataset)

  # Log the train loss this epoch
  wandb.log({
      'train_loss': train_loss_this_epoch,
      'val_loss': val_loss_this_epoch,
  })

  print(f'epoch: {epoch}, train_loss: {train_loss_this_epoch}, val_loss: {val_loss_this_epoch}')

  # If this is the best performing model yet, save it
  if val_loss_this_epoch < best_validation_loss:
    # Update the score
    best_validation_loss = val_loss_this_epoch

    now = datetime.datetime.now()

    # Save the model
    checkpoint_path = join(
      BASE_PATH, 
      config.CLASSIFICATION_MODEL_CHECKPOINT_PATH, 
      f'chechpoint_{now.strftime("%m_%d_%Y_%H_%M_%S")}.pth'
    )
    best_model_state = model_management.save_model(model, checkpoint_path, False)

    

  0%|          | 0/113 [00:00<?, ?it/s]


TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'PIL.Image.Image'>

In [None]:
now = datetime.datetime.now()

# Save the final model
checkpoint_path = join(
    BASE_PATH, 
    config.CLASSIFICATION_MODEL_CHECKPOINT_PATH, 
    f'chechpoint_{now.strftime("%m_%d_%Y_%H_%M_%S")}.pth'
)
best_model_state = model_management.save_model(model, checkpoint_path, True)

In [None]:
# Mark the run as finished
wandb.finish()

0,1
train_loss,█▆▅▄▄▃▃▂▂▂▂▁▁
val_loss,█▅▄▃▂▂▂▂▂▁▁▁▂

0,1
train_loss,0.04736
val_loss,0.60379
