<a href="https://colab.research.google.com/github/EnriqueFV/cad_dl/blob/main/Binary%20Ineptionv3%20sweep.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch>=1.9
!pip install fastai>=2.7
!pip install matplotlib
!pip install numpy
!pip install pandas
!pip install scikit-learn
!pip install torchvision
!pip install tqdm
!pip install wandb
!pip install timm

Collecting wandb
  Downloading wandb-0.16.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
Collecting GitPython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.40-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.6/190.6 kB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-1.39.1-py2.py3-none-any.whl (254 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m254.1/254.1 kB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting setproctitle (from wandb)
  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)
Collecting gitdb<5,>=4.0.1 (from GitPython!=3.1.29,>=1.0.0->w

In [2]:
import wandb
import pandas as pd
import torch
import torchvision.models as tvmodels
from fastai.vision.all import *
from fastai.callback.wandb import WandbCallback
import cv2
import timm
from sklearn.metrics import ConfusionMatrixDisplay
from IPython.display import display, Markdown

In [3]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Thu Dec 28 16:10:48 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   48C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [4]:
WANDB_PROJECT = "cad_dl"
ENTITY = "iconet" # set this to team name if working in a team

RAW_DATA_AT = 'skin_data'
TEST_DATA_AT = 'test_data'
PROCESSED_DATA_AT = 'skin_data_proc'
TV_DATA_AT = 'train_val_data'
TABLE_DATA_AT = 'train_val_table'
TABLE_TEST_AT = 'test_table'

In [13]:
def get_df(processed_dataset_dir, is_test = False):
  df = pd.read_csv(processed_dataset_dir / 'data.csv')

  if not is_test:
    df = df[df.Stage != 'test'].reset_index(drop=True)
    df['is_valid'] = df.Stage == 'valid'
  else:
    df = df[df.Stage == 'test'].reset_index(drop=True)

  df["Label"] = [f'{f}' for f in df.Label.values]

  return df

def find_image_path(file_name, is_test = False):
    if not is_test:
      folders = ['nevus_train_images', 'others_train_images', 'nevus_val_images', 'others_val_images']
    else:
      folders = ['xxx_test_images']


    for folder in folders:
        potential_path = path / folder / f'{file_name}.jpg'
        if potential_path.exists():
            return potential_path

    return None  # If the path couldn't be determined

def get_data(df, bs=4, img_size=(128, 128), augment=True):
    block = DataBlock(blocks=(ImageBlock, CategoryBlock),
                  get_x=ColReader("image_fname"),
                  get_y=ColReader("Label"),
                  splitter=ColSplitter(),
                  item_tfms=Resize(img_size),
                  batch_tfms=[*aug_transforms(),
                               Normalize.from_stats(*imagenet_stats)]
                 )
    return block.dataloaders(df, bs=bs)

def log_final_metrics(learn):
  scores = learn.validate()
  metric_names = ['final_loss'] + [f'final_{x.name}' for x in learn.metrics]
  final_results = {metric_names[i] : scores[i] for i in range(len(scores))}
  for k,v in final_results.items():
      wandb.summary[k] = v

def train(config=None):
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  print(f"Training on device: {device}")
  # Initialize a new wandb run
  with wandb.init(config=config):
      # If called by wandb.agent, as below,
      # this config will be set by Sweep Controller
      config = wandb.config

      set_seed(config.seed, reproducible=True)
      run = wandb.init(project=WANDB_PROJECT, entity=ENTITY, job_type="training", config=config)

      dls = get_data(df, bs=config.batch_size, img_size=config.img_size, augment=config.augment)

      metrics = [Precision(), Recall(), accuracy, F1Score(), MatthewsCorrCoef()]

      learn = vision_learner(dls, arch=getattr(tvmodels, config.arch), pretrained=config.pretrained, metrics=metrics)

      cbs = [WandbCallback(log_preds=True, log_model=True),
              SaveModelCallback(fname=f'run-{wandb.run.id}-model', monitor='accuracy')]

      learn.fit_one_cycle(config.epochs, config.lr, cbs=cbs)

      log_final_metrics(learn)

      wandb.finish()

def get_predictions(learner, test_dl=None, max_n=None):
    """Return the samples = (x,y) and outputs (model predictions decoded), and predictions (raw preds)"""
    test_dl = learner.dls.valid if test_dl is None else test_dl
    inputs, predictions, targets, outputs = learner.get_preds(
        dl=test_dl, with_input=True, with_decoded=True
    )
    x, y, samples, outputs = learner.dls.valid.show_results(
        tuplify(inputs) + tuplify(targets), outputs, show=False, max_n=max_n
    )
    return samples, outputs, predictions

def train_eval(config):

  run = wandb.init(project=WANDB_PROJECT, entity=ENTITY, job_type="evaluation", tags=['staging'])

  artifact = run.use_artifact('iconet/model-registry/Skin Binary Classification:v0', type='model')

  artifact_dir = Path(artifact.download())

  _model_pth = artifact_dir.ls()[0]
  model_path = _model_pth.parent.absolute()/_model_pth.stem

  producer_run = artifact.logged_by()
  wandb.config.update(producer_run.config)
  config = wandb.config

  processed_dataset_dir = download_data()
  test_dir = download_images()
  test_df = get_df(processed_dataset_dir, is_test=True)
  test_df["image_fname"] = test_df["File_Name"].apply(find_image_path())
  test_dls = get_data(test_df, bs=config.batch_size, img_size=config.img_size, augment=config.augment)

  metrics = [Precision(), Recall(), accuracy(), F1Score(), MatthewsCorrCoef()]

  cbs = [WandbCallback(log_preds=True, log_model=True),
                SaveModelCallback(fname=f'run-{wandb.run.id}-model', monitor='accuracy')]

  learn = vision_learner(test_dls, arch=getattr(tvmodels, config.arch), pretrained=config.pretrained, metrics=metrics)

  learn.load(model_path)

  val_metrics = learn.validate(ds_idx=1, )
  test_metrics = learn.validate(ds_idx=0)

  val_metric_names = ['val_loss'] + [f'val_{x.name}' for x in learn.metrics]
  val_results = {val_metric_names[i] : val_metrics[i] for i in range(len(val_metric_names))}
  for k,v in val_results.items():
      wandb.summary[k] = v

  test_metric_names = ['test_loss'] + [f'test_{x.name}' for x in learn.metrics]
  test_results = {test_metric_names[i] : test_metrics[i] for i in range(len(test_metric_names))}
  for k,v in test_results.items():
      wandb.summary[k] = v

  val_probs, val_targs = learn.get_preds(ds_idx=1)
  val_preds = val_probs.argmax(dim=1)


  wandb.finish()



In [6]:
run = wandb.init(project=WANDB_PROJECT, entity=ENTITY)
processed_data_at = wandb.use_artifact(f'{PROCESSED_DATA_AT}:latest')
processed_dataset_dir = Path(processed_data_at.download())

tv_data_at = wandb.use_artifact(f'{TV_DATA_AT}:latest')
path = Path(tv_data_at.download())
wandb.finish()


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


[34m[1mwandb[0m: \ 1 of 5 files downloaded...[34m[1mwandb[0m:   5 of 5 files downloaded.  
[34m[1mwandb[0m: Downloading large artifact train_val_data:latest, 7008.60MB. 18991 files... 
[34m[1mwandb[0m:   18991 of 18991 files downloaded.  
Done. 0:3:1.6


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

In [None]:
df = get_df(processed_dataset_dir, is_test = False)

# Apply the function to create a new column with the correct paths
df["image_fname"] = df["File_Name"].apply(find_image_path)

sweep_config = {
    'method': 'bayes',
    'name': 'sweep',
    'metric': {
        'goal': 'maximize',
        'name': 'accuracy'
        },
    'parameters': {
        'batch_size': {'values': [8, 32]},
        'epochs': {'values': [10, 15]},
        'lr': {
            'distribution': 'log_uniform_values',
            'max': 1e-2,
            'min': 1e-5
        },
        'augment': {'value': 'True'},
        'pretrained': {'value': 'True'},
        'img_size': {'value': (256, 256)},
        'seed': {'values': [42]},
        'arch': {
            'values': ['inception_v3']
            }
        },
    'early_terminate': {
        'type': 'hyperband',
        'min_iter': 3
    }
}

sweep_id = wandb.sweep(sweep_config, project=WANDB_PROJECT)

wandb.agent(sweep_id, train)

Create sweep with ID: tjmmx84m
Sweep URL: https://wandb.ai/iconet/cad_dl/sweeps/tjmmx84m


[34m[1mwandb[0m: Agent Starting Run: 2pjeozi6 with config:
[34m[1mwandb[0m: 	arch: vgg11_bn
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	img_size: [256, 256]
[34m[1mwandb[0m: 	lr: 0.0003211105049969196
[34m[1mwandb[0m: 	pretrained: True
[34m[1mwandb[0m: 	seed: 42


Training on device: cuda


[34m[1mwandb[0m: Currently logged in as: [33menriquefvrc[0m ([33miconet[0m). Use [1m`wandb login --relogin`[0m to force relogin




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Downloading: "https://download.pytorch.org/models/vgg11_bn-6002323d.pth" to /root/.cache/torch/hub/checkpoints/vgg11_bn-6002323d.pth
100%|██████████| 507M/507M [00:05<00:00, 90.0MB/s]


epoch,train_loss,valid_loss,precision_score,recall_score,accuracy,f1_score,matthews_corrcoef,time
0,0.731274,0.526393,0.804196,0.739946,0.78372,0.770734,0.568477,06:15
1,0.586788,0.434722,0.806589,0.813941,0.812698,0.810248,0.625365,06:13
2,0.490161,0.39401,0.860342,0.782842,0.830875,0.819764,0.66374,06:11
3,0.422581,0.364518,0.861653,0.804826,0.840622,0.832271,0.682225,06:09
4,0.413354,0.350785,0.85299,0.83378,0.847734,0.843275,0.69543,06:13
5,0.388496,0.346385,0.869863,0.817158,0.850105,0.842687,0.701056,06:11
6,0.347687,0.341513,0.881188,0.81126,0.85353,0.844779,0.708739,06:03
7,0.349684,0.333994,0.860593,0.840751,0.854847,0.850556,0.709675,06:14
8,0.362705,0.333963,0.865331,0.83378,0.854584,0.849263,0.709354,06:08
9,0.340098,0.333821,0.870968,0.825201,0.854057,0.847467,0.708711,06:04


Better model found at epoch 0 with accuracy value: 0.7837197184562683.
Better model found at epoch 1 with accuracy value: 0.8126975893974304.
Better model found at epoch 2 with accuracy value: 0.8308746218681335.
Better model found at epoch 3 with accuracy value: 0.8406217098236084.
Better model found at epoch 4 with accuracy value: 0.8477344512939453.
Better model found at epoch 5 with accuracy value: 0.850105345249176.
Better model found at epoch 6 with accuracy value: 0.8535300493240356.
Better model found at epoch 7 with accuracy value: 0.8548471927642822.


VBox(children=(Label(value='39.822 MB of 39.822 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁▄▆▇▇█████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
eps_0,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eps_1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eps_2,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
f1_score,▁▄▅▆▇▇▇███
lr_0,▁▁▂▃▄▅▆▇▇██████▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▂▁▁▁▁▁
lr_1,▁▁▂▃▄▅▆▇▇██████▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▂▁▁▁▁▁
lr_2,▁▁▂▃▄▅▆▇▇██████▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▂▁▁▁▁▁
matthews_corrcoef,▁▄▆▇▇█████

0,1
accuracy,0.85406
epoch,10.0
eps_0,1e-05
eps_1,1e-05
eps_2,1e-05
f1_score,0.84747
final_accuracy,0.85485
final_f1_score,0.85056
final_loss,0.33399
final_matthews_corrcoef,0.70967


[34m[1mwandb[0m: Agent Starting Run: i0f1sw8e with config:
[34m[1mwandb[0m: 	arch: vgg19_bn
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	img_size: [256, 256]
[34m[1mwandb[0m: 	lr: 0.0001419537679208692
[34m[1mwandb[0m: 	pretrained: True
[34m[1mwandb[0m: 	seed: 42


Training on device: cuda


[34m[1mwandb[0m: Currently logged in as: [33menriquefvrc[0m ([33miconet[0m). Use [1m`wandb login --relogin`[0m to force relogin




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Downloading: "https://download.pytorch.org/models/vgg19_bn-c79401a0.pth" to /root/.cache/torch/hub/checkpoints/vgg19_bn-c79401a0.pth
100%|██████████| 548M/548M [00:06<00:00, 82.8MB/s]


epoch,train_loss,valid_loss,precision_score,recall_score,accuracy,f1_score,matthews_corrcoef,time
