In [1]:
from pathlib import Path
import pandas as pd

In [219]:
! python ../scripts/01_process_rodi.py \
    --csv_path ../data/raw/RODI_dataset_annotations.csv \
    --out_folder ../data/interim/

In [220]:
! python ../scripts/01_train_test_split.py \
    --csv_path ../data/interim/01_rodi_processed.csv \
    --target_col family \
    --group_col file \
    --n_splits 5 \
    --out_folder ../data/interim/

#### FOLD 0 ####
TRAIN
Heptageniidae     866
Baetidae          787
Capniidae         510
Rhyacophilidae    354
Perlolidae        194
Chironomidae      146
Simulidae         115
Salmonidae         98
Name: family, dtype: int64
TEST
Baetidae          257
Heptageniidae     230
Rhyacophilidae    165
Capniidae         113
Chironomidae       50
Simulidae          38
Perlolidae         28
Salmonidae          3
Name: family, dtype: int64
VAL
Baetidae         257
Heptageniidae    198
Capniidae        109
Chironomidae      40
Salmonidae        28
Perlolidae        16
Name: family, dtype: int64

#### FOLD 1 ####
TRAIN
Baetidae          886
Heptageniidae     842
Capniidae         453
Rhyacophilidae    206
Simulidae         153
Chironomidae      148
Salmonidae         77
Perlolidae         66
Name: family, dtype: int64
TEST
Baetidae          290
Heptageniidae     240
Rhyacophilidae    186
Capniidae         186
Perlolidae         63
Salmonidae         44
Chironomidae       29
Name: family, dtype: in

In [2]:
import pytorch_lightning as pl
import benthic_models.benthic_models as ut

In [3]:
import importlib
importlib.reload(ut)

<module 'benthic_models.benthic_models' from '/scratch/project_2004353/impiomik/benthic-models/src/benthic_models/benthic_models.py'>

In [4]:
from dataclasses import dataclass
@dataclass
class Args:
    data_folder = '../data/raw/RODI_dataset/'
    dataset_name = 'rodi'
    csv_path = '../data/interim/01_rodi_processed_5splits_family.csv'
    fold = 0
    label = 'family'
    class_map = '../data/classes/rodi_01_family.txt'
    imsize = 224
    batch_size = 2
    aug = 'aug-02'
    load_to_memory = False
    log_dir='roditest'
args = Args()

In [12]:
class_map = ut.load_class_map(args.class_map)

In [13]:
dm = ut.LitDataModule(
    data_folder=args.data_folder,
    dataset_name=args.dataset_name,
    csv_path=args.csv_path,
    fold=args.fold,
    label=args.label,
    label_transform=class_map["fwd"],
    imsize=args.imsize,
    batch_size=args.batch_size,
    aug='aug-02',
    load_to_memory=args.load_to_memory,
)

In [14]:
dm.setup()

In [15]:
dm.visualize_datasets('imgs')

In [29]:
import torch

In [31]:
torch.randn((1,3,224,224)).shape

torch.Size([1, 3, 224, 224])

In [27]:
x,y = next(iter(dm.val_dataloader()))
x.shape

torch.Size([2, 3, 224, 224])

In [113]:
fnames, labels = ut.preprocess_dataset(
                data_folder=args.data_folder,
                dataset_name=args.dataset_name,
                csv_path=args.csv_path,
                fold=args.fold,
                label=args.label)

In [16]:
opt_args = {"name": 'adamw'}

model = ut.LitModule(
    model='resnet18',
    freeze_base=False,
    pretrained=True,
    criterion='cross-entropy',
    opt=opt_args,
    n_classes=8,
    lr=0.001,
    label_transform=class_map["inv"],
)

In [17]:
from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger



In [19]:
logger = WandbLogger(project=args.log_dir,
                        name='basename',
                           id='1234')
logger.watch(model)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmikkoim[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.13.3 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


In [None]:
logger = TensorBoardLogger("tb_logs", name='testi')
logger.log_graph(model)

In [35]:
vars(args)

{}

In [27]:
from pytorch_lightning.callbacks import ModelCheckpoint

In [29]:
checkpoint_callback = ModelCheckpoint(
    monitor="val/loss", filename=f"out_" + "{epoch:02d}_{val_loss:.2f}"
)

In [31]:
trainer = pl.Trainer(
    max_epochs=5,
    limit_train_batches=1,
    limit_val_batches=1,
    limit_test_batches=1,
    logger=logger,
    callbacks=[checkpoint_callback]
)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [32]:
trainer.fit(model,dm)

  rank_zero_deprecation(

  | Name  | Type  | Params | In sizes         | Out sizes
---------------------------------------------------------------
0 | model | Model | 11.2 M | [1, 3, 224, 224] | [1, 8]   
---------------------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.722    Total estimated model params size (MB)
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Validation sanity check: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

  rank_zero_deprecation(


In [24]:
trainer.test(model,dm)

  rank_zero_deprecation(
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/acc': 0.0, 'test/f1': 0.0, 'test/loss': 4.378890037536621}
--------------------------------------------------------------------------------


[{'test/loss': 4.378890037536621, 'test/acc': 0.0, 'test/f1': 0.0}]

In [24]:
import wandb

In [34]:
checkpoint_callback.best_model_path

'/scratch/project_2004353/impiomik/benthic-models/notebooks/roditest/1234/checkpoints/out_epoch=00_val_loss=0.00.ckpt'

In [37]:
import shutil

In [39]:
import yaml

In [47]:
result = yaml.dump(vars(wandb.config))
with open('test.yml', 'w') as f:
    f.write(result)

In [49]:
vars(wandb.config)['_items']

{'_wandb': {},
 'model': 'resnet18',
 'freeze_base': False,
 'pretrained': True,
 'n_classes': 8,
 'criterion': 'cross-entropy',
 'opt/name': 'adamw',
 'lr': 0.001}

In [145]:
args.model_weights = '../benthic_resnet18_cross-entropy_b128_220923-1657-e827_epoch=24_val_loss=0.13.ckpt'

ckpt = torch.load(
    args.model_weights,
    map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
)

In [146]:
model = ut.LitModule(**ckpt["hyper_parameters"])
model.label_transform = class_map['inv']

model.load_state_dict(ckpt["state_dict"])
model.freeze()

In [147]:
trainer = pl.Trainer(fast_dev_run=True)
trainer.test(model, dm)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Running in fast_dev_run mode: will run a full train, val, test and prediction loop using 1 batch(es).
  rank_zero_deprecation(
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/acc': 1.0, 'test/f1': 1.0, 'test/loss': 1.847741259553004e-06}
--------------------------------------------------------------------------------


[{'test/loss': 1.847741259553004e-06, 'test/acc': 1.0, 'test/f1': 1.0}]

In [148]:
y_true, y_pred = model.y_true, model.y_pred