## this sample script takes continuous variable as outcome, as example

In [1]:
import os
import pandas as pd

import torch
from torch.utils.data import DataLoader

os.chdir('/mnt/f/workspace/pathology/gigapath/code/transfer')
from gliopath.train.task.flex import seed_torch, train, EmbeddingDataset, TaskHead, collate_fn_with_padding, SeriesHead
from gliopath.utils.proces import split_dataset
from gliopath.train.gadget import get_sampler
from gliopath.models.load import giga_slide_enc
from gliopath.models import longn

os.chdir('/mnt/f/workspace/pathology/gigapath')



In [2]:
seed = 42
dataset_df = pd.read_table('data/metadata.tbl', sep='\t')
test_df = pd.read_table('data/metadata_test.tbl', sep='\t')
embed_path = 'output/tiles/rand_embed/'
z_score = False
outcome_col = ['IDH1','TP53','ATRX','PTEN','EGFR','TERT']
num_classes = len(outcome_col)
batch_size = 4
num_workers = 0
embed_dim = 1536
weighted_sampler = True
feat_layers=[0,1]
num_epochs = 50

unfreeze_schedule = {
    10: [-1],
    20: [-2],
    30: [-3],
}

splits = ['train', 'val', 'test']
split_col = 'split_col'
id_col = 'id'
params = {
    'lr': 0.001,
    'min_lr': 0.0,
    'num_epochs': num_epochs,
    'eval_interval': 10,
    'output_dir': 'output/models/life',
    'optim': 'sgd',
    'weight_decay': 0.01,
    'outcome_type': 'gene',
    'gc_step': 2,
    'freeze_longnet': False,
    'longnet_lr_factor': 0.02,
    'warmup_epochs': 5,
    'active_longnet_layers': None,
    'progressive_unfreeze': True,
    'unfreeze_schedule': unfreeze_schedule,
    'max_frozen_layers': 0,
    'layer_lr_factor': 0.2,
    'min_lr_factor': 0.1,
    'warmup_factor': 0.1,
}

In [3]:
# set the random seed
seed_torch(torch.device('cuda'), 0)
# read the metadata
dataset_df = split_dataset(dataset_df, id_col='id', type_col='tumour_type', val_split=0.2, test_split=0, in_df=True, split_col='split_col')
test_df['split_col'] = 'test'
dataset_df = pd.concat([dataset_df, test_df], ignore_index=True)

# load the dataset
train_dataset, val_dataset, test_dataset = [EmbeddingDataset(dataset_df, embed_path, feat_layer=feat_layers, split_col=split_col, split=split, id_col=id_col, type_col=outcome_col, outcome_type='gene', z_score=z_score) for split in splits]

# set num_classes
print(f'Sample size:\nTrain: {len(train_dataset)}\tVal: {len(val_dataset)}\tTest: {len(test_dataset)}')

Sample size:
Train: 158	Val: 42	Test: 50


In [4]:
# infinite sampler for training
# not sure if cha nge shuffle to TRUE? (*)
# train_sampler = torch.utils.data.sampler.RandomSampler(train_dataset, replacement=True)
train_sampler = get_sampler(train_dataset)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=collate_fn_with_padding, sampler=train_sampler, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=collate_fn_with_padding, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=collate_fn_with_padding, pin_memory=True)

In [5]:
model_longnet = giga_slide_enc(path='model/pub/slide_encoder.pth', global_pool=True)
# model_longnet = longn.glio_create_model(local_path='', model_arch="gigapath_slide_enc3l32d", in_chans=1536, global_pool=True)
model = SeriesHead(LongNetModel=model_longnet,
                   TaskHead=TaskHead(768 * len(feat_layers), num_classes),
                   feat_layers=feat_layers)

/mnt/f/workspace/pathology/gigapath
dilated_ratio:  [1, 2, 4, 8, 16]
segment_length:  [1024, 5792, 32768, 185363, 1048576]
Number of trainable LongNet parameters:  85148160
Global Pooling: True
[92m Successfully Loaded Pretrained GigaPath model from model/pub/slide_encoder.pth [00m
Slide encoder param # 86330880


In [6]:
# Train the model
pred_gather, target_gather = train(model, train_loader, val_loader, test_loader, **params)

Starting with ALL LongNet layers frozen

Progressive Unfreezing Configuration:
  Total LongNet layers: 12 (layers 0-11)
  Permanently frozen layers: 0--1 (first 0 layers)
  Trainable layer range: 0-11
  Initial active layers: None (all frozen)
  Unfreeze schedule: {10: [-1], 20: [-2], 30: [-3]}
  Auto-unfreeze head components: False


DETAILED PARAMETER BREAKDOWN

üìç Embedding Components:
  patch_embed         :          0/ 1,180,416 params  ‚úó FROZEN
  cls_token           :          0/       768 params  ‚úó FROZEN

üìç Encoder Layers:
  Layer  0:          0/ 7,095,552 params  ‚úó FROZEN (can be unfrozen)
  Layer  1:          0/ 7,095,552 params  ‚úó FROZEN (can be unfrozen)
  Layer  2:          0/ 7,095,552 params  ‚úó FROZEN (can be unfrozen)
  Layer  3:          0/ 7,095,552 params  ‚úó FROZEN (can be unfrozen)
  Layer  4:          0/ 7,095,552 params  ‚úó FROZEN (can be unfrozen)
  Layer  5:          0/ 7,095,552 params  ‚úó FROZEN (can be unfrozen)
  Layer  6:          0/ 7,09

  0%|          | 0/50 [00:00<?, ?it/s]

Epoch 0:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [1/50]	Train Loss: 0.7167	LRs: TaskHead: 0.000280


Epoch 1:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [2/50]	Train Loss: 0.7225	LRs: TaskHead: 0.000460


Epoch 2:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [3/50]	Train Loss: 0.6962	LRs: TaskHead: 0.000640


Epoch 3:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [4/50]	Train Loss: 0.6859	LRs: TaskHead: 0.000820


Epoch 4:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [5/50]	Train Loss: 0.6881	LRs: TaskHead: 0.001000


Epoch 5:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [6/50]	Train Loss: 0.6886	LRs: TaskHead: 0.000999


Epoch 6:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [7/50]	Train Loss: 0.6924	LRs: TaskHead: 0.000996


Epoch 7:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [8/50]	Train Loss: 0.6832	LRs: TaskHead: 0.000990


Epoch 8:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [9/50]	Train Loss: 0.6920	LRs: TaskHead: 0.000983


Epoch 9:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [10/50]	Train Loss: 0.6932	LRs: TaskHead: 0.000973
Evaluating at epoch 10...
Val Epoch [10/50] Acc: 0.488 F1: 0.000 Prec: 0.000 Rec: 0.000 AUROC: 0.519 AUPRC: 0.562

EPOCH 10: UNFREEZING LAYERS
  Newly unfrozen layers: [-1]
  Currently active layers: [11]
  Permanently frozen: layers []
  Temporarily frozen: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
  Head components unfrozen: False
  Total trainable layers: 1/12


DETAILED PARAMETER BREAKDOWN

üìç Embedding Components:
  patch_embed         :          0/ 1,180,416 params  ‚úó FROZEN
  cls_token           :          0/       768 params  ‚úó FROZEN

üìç Encoder Layers:
  Layer  0:          0/ 7,095,552 params  ‚úó FROZEN (can be unfrozen)
  Layer  1:          0/ 7,095,552 params  ‚úó FROZEN (can be unfrozen)
  Layer  2:          0/ 7,095,552 params  ‚úó FROZEN (can be unfrozen)
  Layer  3:          0/ 7,095,552 params  ‚úó FROZEN (can be unfrozen)
  Layer  4:          0/ 7,095,552 params  ‚úó FROZEN (can be unfrozen)
  Layer  5:       

Epoch 10:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [11/50]	Train Loss: 0.6875	LRs: TaskHead: 0.000280, LongNet: 0.000006


Epoch 11:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [12/50]	Train Loss: 0.6896	LRs: TaskHead: 0.000460, LongNet: 0.000009


Epoch 12:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [13/50]	Train Loss: 0.6840	LRs: TaskHead: 0.000640, LongNet: 0.000013


Epoch 13:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [14/50]	Train Loss: 0.6873	LRs: TaskHead: 0.000820, LongNet: 0.000016


Epoch 14:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [15/50]	Train Loss: 0.6872	LRs: TaskHead: 0.001000, LongNet: 0.000020


Epoch 15:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [16/50]	Train Loss: 0.6857	LRs: TaskHead: 0.000998, LongNet: 0.000020


Epoch 16:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [17/50]	Train Loss: 0.6849	LRs: TaskHead: 0.000993, LongNet: 0.000020


Epoch 17:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [18/50]	Train Loss: 0.6867	LRs: TaskHead: 0.000984, LongNet: 0.000020


Epoch 18:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [19/50]	Train Loss: 0.6814	LRs: TaskHead: 0.000971, LongNet: 0.000019


Epoch 19:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [20/50]	Train Loss: 0.6820	LRs: TaskHead: 0.000955, LongNet: 0.000019
Evaluating at epoch 20...
Val Epoch [20/50] Acc: 0.492 F1: 0.062 Prec: 0.093 Rec: 0.046 AUROC: 0.525 AUPRC: 0.566
Best F1 improved from 0.000 to 0.062

EPOCH 20: UNFREEZING LAYERS
  Newly unfrozen layers: [-2]
  Currently active layers: [10, 11]
  Permanently frozen: layers []
  Temporarily frozen: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
  Head components unfrozen: False
  Total trainable layers: 2/12


DETAILED PARAMETER BREAKDOWN

üìç Embedding Components:
  patch_embed         :          0/ 1,180,416 params  ‚úó FROZEN
  cls_token           :          0/       768 params  ‚úó FROZEN

üìç Encoder Layers:
  Layer  0:          0/ 7,095,552 params  ‚úó FROZEN (can be unfrozen)
  Layer  1:          0/ 7,095,552 params  ‚úó FROZEN (can be unfrozen)
  Layer  2:          0/ 7,095,552 params  ‚úó FROZEN (can be unfrozen)
  Layer  3:          0/ 7,095,552 params  ‚úó FROZEN (can be unfrozen)
  Layer  4:          0/ 7,095,552

Epoch 20:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [21/50]	Train Loss: 0.6829	LRs: TaskHead: 0.000280, LongNet: 0.000006


Epoch 21:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [22/50]	Train Loss: 0.6870	LRs: TaskHead: 0.000460, LongNet: 0.000009


Epoch 22:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [23/50]	Train Loss: 0.6824	LRs: TaskHead: 0.000640, LongNet: 0.000013


Epoch 23:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [24/50]	Train Loss: 0.6914	LRs: TaskHead: 0.000820, LongNet: 0.000016


Epoch 24:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [25/50]	Train Loss: 0.6879	LRs: TaskHead: 0.001000, LongNet: 0.000020


Epoch 25:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [26/50]	Train Loss: 0.6872	LRs: TaskHead: 0.000996, LongNet: 0.000020


Epoch 26:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [27/50]	Train Loss: 0.6834	LRs: TaskHead: 0.000986, LongNet: 0.000020


Epoch 27:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [28/50]	Train Loss: 0.6846	LRs: TaskHead: 0.000968, LongNet: 0.000019


Epoch 28:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [29/50]	Train Loss: 0.6739	LRs: TaskHead: 0.000944, LongNet: 0.000019


Epoch 29:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [30/50]	Train Loss: 0.6787	LRs: TaskHead: 0.000914, LongNet: 0.000018
Evaluating at epoch 30...
Val Epoch [30/50] Acc: 0.472 F1: 0.075 Prec: 0.068 Rec: 0.083 AUROC: 0.531 AUPRC: 0.568
Best F1 improved from 0.062 to 0.075

EPOCH 30: UNFREEZING LAYERS
  Newly unfrozen layers: [-3]
  Currently active layers: [9, 10, 11]
  Permanently frozen: layers []
  Temporarily frozen: [0, 1, 2, 3, 4, 5, 6, 7, 8]
  Head components unfrozen: False
  Total trainable layers: 3/12


DETAILED PARAMETER BREAKDOWN

üìç Embedding Components:
  patch_embed         :          0/ 1,180,416 params  ‚úó FROZEN
  cls_token           :          0/       768 params  ‚úó FROZEN

üìç Encoder Layers:
  Layer  0:          0/ 7,095,552 params  ‚úó FROZEN (can be unfrozen)
  Layer  1:          0/ 7,095,552 params  ‚úó FROZEN (can be unfrozen)
  Layer  2:          0/ 7,095,552 params  ‚úó FROZEN (can be unfrozen)
  Layer  3:          0/ 7,095,552 params  ‚úó FROZEN (can be unfrozen)
  Layer  4:          0/ 7,095,552

Epoch 30:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [31/50]	Train Loss: 0.6803	LRs: TaskHead: 0.000280, LongNet: 0.000006


Epoch 31:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [32/50]	Train Loss: 0.6822	LRs: TaskHead: 0.000460, LongNet: 0.000009


Epoch 32:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [33/50]	Train Loss: 0.6793	LRs: TaskHead: 0.000640, LongNet: 0.000013


Epoch 33:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [34/50]	Train Loss: 0.6848	LRs: TaskHead: 0.000820, LongNet: 0.000016


Epoch 34:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [35/50]	Train Loss: 0.6785	LRs: TaskHead: 0.001000, LongNet: 0.000020


Epoch 35:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [36/50]	Train Loss: 0.6835	LRs: TaskHead: 0.000990, LongNet: 0.000020


Epoch 36:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [37/50]	Train Loss: 0.6849	LRs: TaskHead: 0.000961, LongNet: 0.000019


Epoch 37:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [38/50]	Train Loss: 0.6850	LRs: TaskHead: 0.000914, LongNet: 0.000018


Epoch 38:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [39/50]	Train Loss: 0.6828	LRs: TaskHead: 0.000851, LongNet: 0.000017


Epoch 39:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [40/50]	Train Loss: 0.6817	LRs: TaskHead: 0.000775, LongNet: 0.000016
Evaluating at epoch 40...
Val Epoch [40/50] Acc: 0.484 F1: 0.000 Prec: 0.000 Rec: 0.000 AUROC: 0.528 AUPRC: 0.567


Epoch 40:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [41/50]	Train Loss: 0.6803	LRs: TaskHead: 0.000689, LongNet: 0.000014


Epoch 41:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [42/50]	Train Loss: 0.6786	LRs: TaskHead: 0.000597, LongNet: 0.000012


Epoch 42:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [43/50]	Train Loss: 0.6786	LRs: TaskHead: 0.000503, LongNet: 0.000010


Epoch 43:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [44/50]	Train Loss: 0.6799	LRs: TaskHead: 0.000411, LongNet: 0.000008


Epoch 44:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [45/50]	Train Loss: 0.6766	LRs: TaskHead: 0.000325, LongNet: 0.000007


Epoch 45:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [46/50]	Train Loss: 0.6777	LRs: TaskHead: 0.000249, LongNet: 0.000005


Epoch 46:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [47/50]	Train Loss: 0.6736	LRs: TaskHead: 0.000186, LongNet: 0.000004


Epoch 47:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [48/50]	Train Loss: 0.6755	LRs: TaskHead: 0.000139, LongNet: 0.000003


Epoch 48:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [49/50]	Train Loss: 0.6799	LRs: TaskHead: 0.000110, LongNet: 0.000002


Epoch 49:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch [50/50]	Train Loss: 0.6742	LRs: TaskHead: 0.000100, LongNet: 0.000002
Evaluating at epoch 50...
Val Epoch [50/50] Acc: 0.488 F1: 0.030 Prec: 0.083 Rec: 0.019 AUROC: 0.527 AUPRC: 0.566
Loading best model with f1: 0.075
Evaluating on test set...
Test Acc: 0.507 F1: 0.083 Prec: 0.080 Rec: 0.086 AUROC: 0.443 AUPRC: 0.484
