## load libraries

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
# standard python packages
import os, sys, shutil
from glob import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import random

In [3]:
sys.path.insert(0, "../")
from utils.DLutils import *
from utils.vizutils import *
from create_toybrains import ToyBrainsData
from experiments.fit_DL_model import *

  from .autonotebook import tqdm as notebook_tqdm
OMP: Info #277: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


In [4]:
DEEPREPVIZ_REPO = "../../Deep-confound-control-v2/"
sys.path.append(DEEPREPVIZ_REPO)
from DeepRepViz import *

In [5]:
from lightning.pytorch.loggers import TensorBoardLogger, CSVLogger, WandbLogger

In [6]:
import logging
# disable some unneccesary lightning warnings
logging.getLogger("lightning.pytorch.utilities.rank_zero").setLevel(logging.WARNING)
logging.getLogger("lightning.pytorch.accelerators.cuda").setLevel(logging.WARNING)

In [7]:
base_config_file = "configs/lblmidr_covsite/lblmidr_covsite_t0.py"

DATASETS = sorted([os.path.abspath(path) for path in glob("../dataset/toybrains_n10000_lblmidr_covsite_t*")])
print("Fitting DL model on the following toybrains datasets:")
DATASETS

Fitting DL model on the following toybrains datasets:


['/ritter/roshan/workspace/toybrains/dataset/toybrains_n10000_lblmidr_covsite_t0',
 '/ritter/roshan/workspace/toybrains/dataset/toybrains_n10000_lblmidr_covsite_t1',
 '/ritter/roshan/workspace/toybrains/dataset/toybrains_n10000_lblmidr_covsite_t2',
 '/ritter/roshan/workspace/toybrains/dataset/toybrains_n10000_lblmidr_covsite_t3',
 '/ritter/roshan/workspace/toybrains/dataset/toybrains_n10000_lblmidr_covsite_t4']

## Fit DL models

In [8]:
# select one dataset
dataset_path = DATASETS[-1]
unique_name = dataset_path.split('/')[-1].split('_')[-1]
raw_csv_path = glob(f'{dataset_path}/*{unique_name}.csv')[0]

In [9]:
debug = True
label = "lbl_lesion"
random_seed=42 if debug else None
batch_size=64

#### select the GPU

In [10]:
# check GPUs available and memory
! gpustat

[1m[37mcuda01                       [m  Fri Jan  5 17:48:42 2024  [1m[30m545.23.06[m
[36m[0][m [34mNVIDIA GeForce GTX 1080 Ti[m |[31m 25'C[m, [32m  0 %[m | [36m[1m[33m  100[m / [33m11264[m MB | [1m[30mgdm[m([33m4M[m)
[36m[1][m [34mNVIDIA GeForce GTX 1080 Ti[m |[31m 27'C[m, [32m  0 %[m | [36m[1m[33m  100[m / [33m11264[m MB | [1m[30mgdm[m([33m4M[m)
[36m[2][m [34mNVIDIA GeForce GTX 1080 Ti[m |[31m 27'C[m, [32m  0 %[m | [36m[1m[33m  100[m / [33m11264[m MB | [1m[30mgdm[m([33m4M[m)
[36m[3][m [34mNVIDIA GeForce GTX 1080 Ti[m |[31m 28'C[m, [32m  0 %[m | [36m[1m[33m 1372[m / [33m11264[m MB | [1m[30mmanuel[m([33m350M[m) [1m[30mmanuel[m([33m350M[m) [1m[30mmanuel[m([33m350M[m) [1m[30mmanuel[m([33m220M[m) [1m[30mgdm[m([33m4M[m)
[36m[4][m [34mNVIDIA GeForce GTX 1080 Ti[m |[31m 26'C[m, [32m  0 %[m | [36m[1m[33m  100[m / [33m11264[m MB | [1m[30mgdm[m([33m4M[m)
[36m[5][m [34mNVIDI

In [11]:
GPUs = [1]

In [12]:
torch.set_float32_matmul_precision('medium')
os.environ["CUDA_LAUNCH_BLOCKING"]="1"
os.environ["TF_ENABLE_ONEDNN_OPTS"]="0"

#### Model config

In [13]:
model_class = SimpleCNN
model_kwargs=dict(num_classes=1, final_act_size=3)


test model init and see model layer names for hook

In [14]:
model = model_class(**model_kwargs)
print(f"pytorch_total_params = {sum(p.numel() for p in model.parameters() if p.requires_grad)})")

pytorch_total_params = 36103)


In [15]:
get_all_model_layers(model)

[('0', Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))),
 ('0', Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))),
 ('0', Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))),
 ('1', Linear(in_features=4096, out_features=3, bias=True)),
 ('2', Linear(in_features=3, out_features=1, bias=True))]

#### Load data

In [16]:
df_data = pd.read_csv(raw_csv_path)

In [17]:
# split the dataset
df_train, df_val, df_test = split_dataset(df_data, label, random_seed)

print(f"Dataset: {dataset_path} ({unique_name})\n  Training data split = {len(df_train)} \n \
 Validation data split = {len(df_val)} \n  Test data split = {len(df_test)}")

# generate data loaders
common_settings = dict(images_dir=dataset_path+'/images',
                       batch_size=batch_size,
                       num_workers=16)

train_loader = get_toybrain_dataloader(
                df_train,
                **common_settings)
val_loader = get_toybrain_dataloader(
                df_val, shuffle=False,
                **common_settings)
test_loader = get_toybrain_dataloader(
                df_test, shuffle=False,
                **common_settings)

Dataset: /ritter/roshan/workspace/toybrains/dataset/toybrains_n10000_lblmidr_covsite_t4 (t4)
  Training data split = 7809 
  Validation data split = 191 
  Test data split = 2000


#### DeepRepViz Init

In [18]:
# create one full dataloader with the whole data and no shuffle enabled
split_colname = 'datasplit'
ID_col = 'subjectID'
# add the split info too
df_train[split_colname] = 'train'
df_val[split_colname]   = 'val'
df_test[split_colname]  = 'test'
df_data = pd.concat([df_train, df_val, df_test])
IDs = df_data[ID_col].values
expected_labels = df_data[label].values
datasplits = df_data[split_colname].values

drv_loader_kwargs = dict(
                img_dir=dataset_path+'/images',
                img_names=df_data[ID_col].values,
                labels=df_data[label].values,
                transform=transforms.ToTensor())

deeprepviz_kwargs = dict(
                 dataloader_class=ToyBrainsDataloader, 
                 dataloader_kwargs=drv_loader_kwargs,
                 expected_IDs=IDs, expected_labels=expected_labels, datasplits=datasplits,
                 hook_layer=-1,
                 debug=False)

In [19]:
# test DeepRepViz config
DeepRepViz(**deeprepviz_kwargs)

<DeepRepViz.DeepRepViz at 0x7f61016fa950>

#### run training 

In [21]:

# setup training args
logger_args = dict(save_dir='log/debugmode/', 
                   name=f'toybrains-{unique_name}')
trainer_args = {"max_epochs":5 if debug else 50, 
                "accelerator":'gpu',
                "devices":[1]}
early_stop_patience  = 6

# reinit model
model = model_class(**model_kwargs)

# reinit deeprepviz
drv = DeepRepViz(**deeprepviz_kwargs)

callbacks=[drv]
if early_stop_patience:
    callbacks.append(EarlyStopping(monitor="val_loss", mode="min", 
                                   patience=early_stop_patience))
    
lightning_model = LightningModel(model, learning_rate=0.05, 
                                 num_classes=model_kwargs['num_classes'])
# configure trainer settings
logger = TensorBoardLogger( **logger_args)
! rm -rf /log/*debugmode*
               
# train model
trainer = L.Trainer(callbacks=callbacks,
                    logger=logger,
                    overfit_batches= 5 if debug else 0,
                    log_every_n_steps= 2 if debug else 50,
                    **trainer_args) # deterministic=True
trainer.fit(
    model=lightning_model,
    train_dataloaders=train_loader,
    val_dataloaders=val_loader)


# test model
test_scores = trainer.test(lightning_model, verbose=False,
                           dataloaders=test_loader,
                          )[0]

print("Test data performance with the best model:\n\
-------------------------------------------------------\n\
Dataset      = {} ({})\n\
Balanced Acc = {:.2f}% \t D2 = {:.2f}%".format(
    dataset_path, unique_name, 
     test_scores['test_BAC']*100,  test_scores['test_D2']*100))

Missing logger folder: log/testrun/toybrains-t4

  | Name           | Type              | Params
-----------------------------------------------------
0 | model          | SimpleCNN         | 36.1 K
1 | _metric_spec   | BinarySpecificity | 0     
2 | _metric_recall | BinaryRecall      | 0     
3 | metric_D2      | D2metric          | 0     
-----------------------------------------------------
36.1 K    Trainable params
0         Non-trainable params
36.1 K    Total params
0.144     Total estimated model params size (MB)


                                                                           

You requested to overfit but enabled train dataloader shuffling. We are turning off the train dataloader shuffling for you.


Epoch 4: 100%|██████████| 5/5 [00:20<00:00,  0.24it/s, v_num=0, val_loss=3.420, val_BAC=0.500, val_D2=-3.95, train_loss=0.367, train_BAC=0.873, train_D2=0.460]
Testing DataLoader 0: 100%|██████████| 31/31 [00:00<00:00, 59.17it/s]
Test data performance with the best model:
-------------------------------------------------------
Dataset      = /ritter/roshan/workspace/toybrains/dataset/toybrains_n10000_lblmidr_covsite_t4 (t4)
Balanced Acc = 50.00% 	 D2 = -401.46%


```bash
nohup python3 fit_DL_model.py --data_dir dataset/toybrains_n10000_lblmidr_covsite_t0 --gpus 0 --final_act_size 3 -n "_cls1-fea3" &> nohup_0-3.out &
nohup python3 fit_DL_model.py --data_dir dataset/toybrains_n10000_lblmidr_covsite_t1 --gpus 1 --final_act_size 3 -n "_cls1-fea3" &> nohup_1-3.out &
nohup python3 fit_DL_model.py --data_dir dataset/toybrains_n10000_lblmidr_covsite_t2 --gpus 2 --final_act_size 3 -n "_cls1-fea3" &> nohup_2-3.out &
nohup python3 fit_DL_model.py --data_dir dataset/toybrains_n10000_lblmidr_covsite_t3 --gpus 3 --final_act_size 3 -n "_cls1-fea3" &> nohup_3-3.out &
nohup python3 fit_DL_model.py --data_dir dataset/toybrains_n10000_lblmidr_covsite_t4 --gpus 4 --final_act_size 3 -n "_cls1-fea3" &> nohup_4-3.out &


nohup python3 fit_DL_model.py --data_dir dataset/toybrains_n10000_lblmidr_covsite_t0 --gpus 5 --final_act_size 64 -n "_cls1-fea64" &> nohup_0-64.out &
nohup python3 fit_DL_model.py --data_dir dataset/toybrains_n10000_lblmidr_covsite_t1 --gpus 6 --final_act_size 64 -n "_cls1-fea64" &> nohup_1-64.out &
nohup python3 fit_DL_model.py --data_dir dataset/toybrains_n10000_lblmidr_covsite_t2 --gpus 7 --final_act_size 64 -n "_cls1-fea64" &> nohup_2-64.out &
nohup python3 fit_DL_model.py --data_dir dataset/toybrains_n10000_lblmidr_covsite_t3 --gpus 0 --final_act_size 64 -n "_cls1-fea64" &> nohup_3-64.out &
nohup python3 fit_DL_model.py --data_dir dataset/toybrains_n10000_lblmidr_covsite_t4 --gpus 1 --final_act_size 64 -n "_cls1-fea64" &> nohup_4-64.out &

```