In [1]:
import wandb
import pandas as pd
import torch
import torchvision.models as tvmodels
from fastai.vision.all import *
from fastai.callback.wandb import WandbCallback

In [2]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Wed Dec 20 22:44:12 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 546.33                 Driver Version: 546.33       CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                     TCC/WDDM  | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 4060      WDDM  | 00000000:01:00.0  On |                  N/A |
|  0%   50C    P3              N/A / 115W |   2353MiB /  8188MiB |     12%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [3]:
WANDB_PROJECT = "cad_dl"
ENTITY = "iconet" # set this to team name if working in a team

RAW_DATA_AT = 'skin_data'
PROCESSED_DATA_AT = 'skin_data_proc'

path = Path(r"C:\Users\Enrique\Desktop\Udg\CAD\Project\cad_dl\artifacts\skin_data-v0")

processed_dataset_dir= Path(r"C:\Users\Enrique\Desktop\Udg\CAD\Project\cad_dl\artifacts\skin_data_proc-v3")

In [5]:
def get_df(processed_dataset_dir, is_test = False):
  df = pd.read_csv(processed_dataset_dir / 'data.csv')

  if not is_test:
    df = df[df.Stage != 'test'].reset_index(drop=True)
    df['is_valid'] = df.Stage == 'valid'
  else:
    df = df[df.Stage == 'test'].reset_index(drop=True)

  df["Label"] = [f'{f}' for f in df.Label.values]

  return df

def find_image_path(file_name):
    folders = ['nevus_train_images', 'others_train_images', 'nevus_val_images', 'others_val_images']

    for folder in folders:
        potential_path = path / folder / f'{file_name}.jpg'
        if potential_path.exists():
            return potential_path

    return None  # If the path couldn't be determined

def get_data(df, bs=4, img_size=(128, 128), augment=True):
    block = DataBlock(blocks=(ImageBlock, CategoryBlock),
                  get_x=ColReader("image_fname"),
                  get_y=ColReader("Label"),
                  splitter=ColSplitter(),
                  item_tfms=Resize(img_size),
                  batch_tfms=aug_transforms() if augment else None,
                 )
    return block.dataloaders(df, bs=bs)

def get_data(df, bs=4, img_size=(128, 128), augment=True):
    block = DataBlock(blocks=(ImageBlock, CategoryBlock),
                  get_x=ColReader("image_fname"),
                  get_y=ColReader("Label"),
                  splitter=ColSplitter(),
                  item_tfms=Resize(img_size),
                  batch_tfms=[aug_transforms() if augment else None, Normalize.from_stats(*imagenet_stats)]
                 )
    return block.dataloaders(df, bs=bs)

def log_final_metrics(learn):
  scores = learn.validate()
  metric_names = ['final_loss'] + [f'final_{x.name}' for x in learn.metrics]
  final_results = {metric_names[i] : scores[i] for i in range(len(scores))}
  for k,v in final_results.items():
      wandb.summary[k] = v

def train(config=None):
  # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  device = torch.device('cpu')
  print(f"Training on device: {device}")
  # Initialize a new wandb run
  with wandb.init(config=config):
      # If called by wandb.agent, as below,
      # this config will be set by Sweep Controller
      config = wandb.config

      set_seed(config.seed, reproducible=True)
      run = wandb.init(project=WANDB_PROJECT, entity=ENTITY, job_type="training", config=config)

      # processed_dataset_dir = download_csv()
      # df = get_df(processed_dataset_dir)



      dls = get_data(df, bs=config.batch_size, img_size=config.img_size, augment=config.augment)

      metrics = [Precision(), Recall(), F1Score(), MatthewsCorrCoef()]

      learn = vision_learner(dls, arch=getattr(tvmodels, config.arch), pretrained=config.pretrained, metrics=metrics)


      cbs = [WandbCallback(log_preds=True, log_model=True),
              SaveModelCallback(fname=f'run-{wandb.run.id}-model', monitor='f1_score')]

      #learn.lr_find(stop_div=False, num_it=200)
      #learn.recorder.plot(suggestion=True)


      learn.fit_one_cycle(config.epochs, config.lr, cbs=cbs)

      log_final_metrics(learn)

      wandb.finish()
      

In [6]:
train_config = SimpleNamespace(
    framework="fastai",
    img_size=(128, 128),
    batch_size=32,
    augment=None, # use data augmentation
    epochs=10,
    lr=2e-3,
    arch="resnet50",
    pretrained=True,  # whether to use pretrained encoder
    seed=42,
)

set_seed(train_config.seed, reproducible=True)

run = wandb.init(project=WANDB_PROJECT, entity=ENTITY, job_type="training", config=train_config)
df = get_df(processed_dataset_dir, is_test = False)

# Apply the function to create a new column with the correct paths
df["image_fname"] = df["File_Name"].apply(find_image_path)
config = wandb.config
dls = get_data(df, bs=config.batch_size, img_size=config.img_size, augment=config.augment)

train(train_config)

sweep_config = {
    'method': 'bayes',
    'name': 'sweep',
    'metric': {
        'goal': 'maximize',
        'name': 'f1_score'
        },
    'parameters': {
        'batch_size': {'values': [8, 32]},
        'epochs': {'values': [10, 15]},
        'lr': {
            'distribution': 'log_uniform_values',
            'max': 1e-2,
            'min': 1e-5
        },
        'augment': {'value': 'True'},
        'pretrained': {'value': 'True'},
        'img_size': {'value': (256, 256)},
        'seed': {'values': [42]},
        'arch': {
            'values': ['densenet121', 'densenet161', 'densenet169','densenet201']
            }
        },
    'early_terminate': {
        'type': 'hyperband',
        'min_iter': 3
    }
}

sweep_id = wandb.sweep(sweep_config, project=WANDB_PROJECT)

wandb.agent(sweep_id, train)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


[34m[1mwandb[0m: Currently logged in as: [33menriquefvrc[0m ([33miconet[0m). Use [1m`wandb login --relogin`[0m to force relogin


Training on device: cpu


epoch,train_loss,valid_loss,precision_score,recall_score,f1_score,matthews_corrcoef,time
0,0.666255,0.535643,0.76878,0.773727,0.771245,0.548923,02:56
1,0.487819,0.439548,0.800645,0.798928,0.799785,0.606823,02:51
2,0.441145,0.407852,0.84565,0.760858,0.801016,0.630888,03:24
3,0.39012,0.391812,0.838202,0.8,0.818656,0.652017,02:56
4,0.396718,0.366934,0.819352,0.853619,0.836134,0.671936,02:54
5,0.359951,0.360908,0.840603,0.836997,0.838796,0.683765,02:53
6,0.327334,0.355021,0.860399,0.809651,0.834254,0.684617,02:51
7,0.296376,0.352523,0.860767,0.818767,0.839242,0.692228,02:54
8,0.290549,0.352234,0.863272,0.809115,0.835317,0.687392,02:50
9,0.279131,0.357274,0.875443,0.795174,0.83338,0.689797,02:52


Better model found at epoch 0 with f1_score value: 0.7712453233564939.
Better model found at epoch 1 with f1_score value: 0.7997852925389157.
Better model found at epoch 2 with f1_score value: 0.8010160880609652.
Better model found at epoch 3 with f1_score value: 0.8186556927297668.
Better model found at epoch 4 with f1_score value: 0.8361344537815126.
Better model found at epoch 5 with f1_score value: 0.8387963460505106.
Better model found at epoch 7 with f1_score value: 0.8392415498763397.


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
eps_0,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eps_1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eps_2,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
f1_score,▁▄▄▆██▇██▇
lr_0,▁▁▂▃▄▅▆▇▇██████▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▂▁▁▁▁▁
lr_1,▁▁▂▃▄▅▆▇▇██████▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▂▁▁▁▁▁
lr_2,▁▁▂▃▄▅▆▇▇██████▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▂▁▁▁▁▁
matthews_corrcoef,▁▄▅▆▇█████
mom_0,██▇▆▅▄▃▂▂▁▁▁▁▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇▇█████

0,1
epoch,10.0
eps_0,1e-05
eps_1,1e-05
eps_2,1e-05
f1_score,0.83338
final_f1_score,0.83924
final_loss,0.35252
final_matthews_corrcoef,0.69223
final_precision_score,0.86077
final_recall_score,0.81877


Create sweep with ID: g13xvd1x
Sweep URL: https://wandb.ai/iconet/cad_dl/sweeps/g13xvd1x


[34m[1mwandb[0m: Agent Starting Run: 6yn7aeu4 with config:
[34m[1mwandb[0m: 	arch: densenet121
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	img_size: [256, 256]
[34m[1mwandb[0m: 	lr: 0.00013349972056758347
[34m[1mwandb[0m: 	pretrained: True
[34m[1mwandb[0m: 	seed: 42
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training on device: cpu




Traceback (most recent call last):
  File "C:\Users\Enrique\AppData\Local\Temp\ipykernel_17344\2963267710.py", line 69, in train
    dls = get_data(df, bs=config.batch_size, img_size=config.img_size, augment=config.augment)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Enrique\AppData\Local\Temp\ipykernel_17344\2963267710.py", line 42, in get_data
    return block.dataloaders(df, bs=bs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Enrique\Desktop\Udg\CAD\Project\cad_dl\.venv\Lib\site-packages\fastai\data\block.py", line 157, in dataloaders
    return dsets.dataloaders(path=path, after_item=self.item_tfms, after_batch=self.batch_tfms, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Enrique\Desktop\Udg\CAD\Project\cad_dl\.venv\Lib\site-packages\fastai\data\core.py", line 337, in dataloaders
    dls = [dl] + [dl.new(self.subset(

Could not do one pass in your dataloader, there is something wrong in it. Please see the stack trace below:


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.


In [None]:
wandb sweep --resume iconet/cad_dl/sweeps/g13xvd1x