In [1]:
from src.data import DataModule

dm = DataModule(
    r"C:\Users\user\data\dl_radiomics\preprocessed_3d",
    r"C:\Users\user\data\tables\lesion_followup_curated_v4.csv",
    "lung",
    "mst",
)
dm.setup()

Loading dataset: 100%|██████████| 123/123 [00:12<00:00,  9.90it/s]
Loading dataset: 100%|██████████| 42/42 [00:04<00:00, 10.14it/s]
Loading dataset: 100%|██████████| 56/56 [00:05<00:00,  9.91it/s]


In [2]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import wandb
from torchmetrics.classification import BinaryAUROC, Accuracy
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning import LightningModule, Trainer, seed_everything
from monai.networks.nets.densenet import DenseNet121


class Model(LightningModule):
    def __init__(self, aggregation_function='mean'):
        super().__init__()
        self.model = DenseNet121(spatial_dims=3, in_channels=1, out_channels=1)
        self.aggregation_function = aggregation_function

        self.train_auc = BinaryAUROC(pos_label=1)
        self.val_auc = BinaryAUROC(pos_label=1)

        self.train_patient_auc = BinaryAUROC(pos_label=1)
        self.val_patient_auc = BinaryAUROC(pos_label=1)

        self.patient_labels = pd.read_csv(r'C:\Users\user\data\tables\dmtr.csv').set_index('id')['locafmetlong'].fillna(0)

    def forward(self, x):
        return self.model(x)

    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.model.parameters(), lr=1e-2)

        return optimizer

    def training_step(self, batch, batch_idx):
        x, y = batch["img"], batch["label"]
        y_hat = torch.sigmoid(self.model(x))

        loss = nn.BCELoss()(y_hat.squeeze(), y.float())
        self.train_auc.update(y_hat.squeeze(), y.int())

        patient_level_preds = self.get_patient_level_preds(y_hat, batch['patient'])
        patient_level_labels = self.get_corresponding_patient_level_labels(patient_level_preds.index)

        self.train_patient_auc.update(
            torch.tensor(patient_level_preds.values), 
            patient_level_labels
        )

        self.log_dict(
            {"train_loss": loss, 'train_auc': self.train_auc.compute(), 'train_patient_auc':self.train_patient_auc.compute()},
            on_step=False,
            on_epoch=True,
            prog_bar=True,
            logger=True,
        )

        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch["img"], batch["label"]
        y_hat = torch.sigmoid(self.model(x))

        loss = nn.BCELoss()(y_hat.squeeze(), y.float())
        self.val_auc.update(y_hat.squeeze(), y.int())

        patient_level_preds = self.get_patient_level_preds(y_hat, batch['patient'])
        patient_level_labels = self.get_corresponding_patient_level_labels(patient_level_preds.index)

        self.val_patient_auc.update(
            torch.tensor(patient_level_preds.values), 
            patient_level_labels
        )

        self.log_dict(
            {"valid_loss": loss, 'valid_auc': self.val_auc.compute(), 'valid_patient_auc':self.val_patient_auc.compute()},
            on_step=False,
            on_epoch=True,
            prog_bar=True,
            logger=True,
        )

        return loss

    def get_patient_level_preds(self, preds, patients):
        results = pd.DataFrame([patients, preds.squeeze()], index=['patient','preds']).transpose()
        patient_level_preds = results.groupby('patient').preds.apply(self.get_aggregation_function()).apply(np.array)

        return patient_level_preds

    def get_corresponding_patient_level_labels(self, patients):
        return torch.tensor(self.patient_labels.loc[patients].values)

    def get_aggregation_function(self):
        if self.aggregation_function == 'mean':
            return np.mean
        elif self.aggregation_function == 'max':
            return np.max
        elif self.aggregation_function == 'min':
            return np.min

model = Model()

Columns (28,33,186,190,236,248,252,371,376,433,470,480,525,532,535,543,544,546,549,551,552,553,573,574,575,576,581,585,655,706,752,761) have mixed types.Specify dtype option on import or set low_memory=False.


In [3]:
import wandb
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning import Trainer, seed_everything


seed_everything(0)

logger = WandbLogger(
    name='hello3',
    project='project_skeleton_on_lung_lesions',
)

trainer = Trainer(
    max_epochs=50,
    gpus=1,
    deterministic=True,
    fast_dev_run=False,
    logger=logger
)
        
trainer.fit(model, dm)

wandb.finish()

Global seed set to 0


2022-10-28 08:16:03,480 - Global seed set to 0


GPU available: True, used: True


2022-10-28 08:16:03,527 - GPU available: True, used: True


TPU available: None, using: 0 TPU cores


2022-10-28 08:16:03,528 - TPU available: None, using: 0 TPU cores
2022-10-28 08:16:03,532 - LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
2022-10-28 08:16:05,322 - Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


[34m[1mwandb[0m: Currently logged in as: [33mrenstermaat[0m. Use [1m`wandb login --relogin`[0m to force relogin



  | Name              | Type        | Params
--------------------------------------------------
0 | model             | DenseNet121 | 11.2 M
1 | train_auc         | BinaryAUROC | 0     
2 | val_auc           | BinaryAUROC | 0     
3 | train_patient_auc | BinaryAUROC | 0     
4 | val_patient_auc   | BinaryAUROC | 0     
--------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.975    Total estimated model params size (MB)


2022-10-28 08:16:08,083 - 
  | Name              | Type        | Params
--------------------------------------------------
0 | model             | DenseNet121 | 11.2 M
1 | train_auc         | BinaryAUROC | 0     
2 | val_auc           | BinaryAUROC | 0     
3 | train_patient_auc | BinaryAUROC | 0     
4 | val_patient_auc   | BinaryAUROC | 0     
--------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.975    Total estimated model params size (MB)
Epoch 0:   0%|          | 0/12 [00:00<?, ?it/s]                       

No positive samples in targets, true positive value should be meaningless. Returning zero tensor in true positive score


Epoch 49: 100%|██████████| 12/12 [00:06<00:00,  1.93it/s, loss=0.0176, v_num=hksk, valid_loss=0.394, valid_auc=0.866, valid_patient_auc=0.752, train_loss=0.0168, train_auc=0.994, train_patient_auc=0.855]


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train_auc,▁▂▃▅▆▆▆▇▇▇▇▇▇███████████████████████████
train_loss,█▆▆▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_patient_auc,▁▄▅▆▇▇▇▇████████████████████████████████
valid_auc,▁▂▃▃▄▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇████████████████████
valid_loss,█▇██▄▃▃▂▂▂▂▁▁▂▁▁▁▃▁▁▁▁▁▁▁▁▂▂▂▁▂▂▂▂▂▂▂▂▂▂
valid_patient_auc,▁▂▂▂▂▃▃▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇████████████

0,1
epoch,49.0
train_auc,0.99443
train_loss,0.01679
train_patient_auc,0.85539
valid_auc,0.86607
valid_loss,0.3943
valid_patient_auc,0.7521


In [20]:
x = next(iter(dm.train_dataloader()))

y_hat = model(x['img'])

In [21]:
results = pd.DataFrame([x['patient'], y_hat.squeeze()], index=['patient','preds']).transpose()
patient_preds = torch.tensor(results.groupby('patient').preds.apply(np.max).values)

TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.

In [25]:
results.groupby('patient').preds.apply(np.max).apply(np.array)

patient
PREM_ZU_008   -1.273657
PREM_ZU_012   -1.483079
PREM_ZU_017   -1.594422
Name: preds, dtype: float64

In [33]:
labels = torch.tensor(model.patient_labels.loc[x['patient']].values)

In [34]:
BinaryAUROC(pos_label=1)(patient_preds, labels)

No positive samples in targets, true positive value should be meaningless. Returning zero tensor in true positive score


tensor(0., dtype=torch.float64)

In [28]:
patient_preds

array([-0.12188261,  0.02286573,  0.0823191 , -0.12367885])