In [5]:
import pytorch_lightning as pl
import os
import torch
import pandas as pd
import numpy as np
import torch.nn as nn
from torchvision.models import densenet121
from pathlib import Path
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from monai.data import CacheDataset
from monai.transforms import Lambdad, ToTensord
from PIL import Image
from torchmetrics.classification.auroc import AUROC


data_path = Path('/home/rens/hpc/rens/output/patches/isala_primary_level4')

class PatchLevelClassifier(pl.LightningModule):
    def __init__(self, data_path):
        super().__init__()
        
        # setup model
        self.model = densenet121(pretrained=False)
        self.model.classifier = nn.Sequential(
            nn.Linear(1024,1),
            nn.Softmax(-1)
        )

        # setup input data
        dmtr = pd.read_csv('/home/rens/repos/PREMIUM/code/radiomics_paper/dmtr.csv').set_index('id')

        paths = [str(p) for p in data_path.iterdir()]
        patients = [p.stem.split('_')[0].replace('-','_') for p in data_path.iterdir()]

        dcb = []
        for patient in patients:
            try:
                dcb.append(dmtr.loc[patient, 'dcb'])
            except:
                dcb.append(float('nan'))
                continue

        df = pd.DataFrame(paths, columns=['path'])
        df['patient'] = patients
        df['dcb'] = dcb

        df = df.dropna(subset=['dcb'])

        unique_patients = df.patient.unique()

        train_patients= unique_patients[:39]
        self.train_input = self.df_to_input(df[df.patient.isin(train_patients)])

        val_patients = unique_patients[39:44]
        self.val_input = self.df_to_input(df[df.patient.isin(val_patients)])

        test_patients = unique_patients[44:]
        self.test_input = self.df_to_input(df[df.patient.isin(test_patients)])

        # setup transformations for dataloaders
        self.transform=transforms.Compose([
            Lambdad(['image'], lambda x: np.array(Image.open(x)).transpose(2,0,1).astype(np.float32)),
            ToTensord(keys=['image'])
        ])

        # setup dataframe for storing predictions
        self.predictions = pd.DataFrame(columns=['pred'])

    def forward(self, x):
        return self.model(x)

    def make_dataloader(self, input):
        ds = CacheDataset(input, transform=self.transform, cache_rate=0)
        dl = DataLoader(ds, batch_size=16, shuffle=False, num_workers=12)
        
        return dl

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-5)
        return optimizer

    def train_dataloader(self):
        return self.make_dataloader(self.train_input)

    def val_dataloader(self):
        return self.make_dataloader(self.val_input)

    def test_dataloader(self):
        return self.make_dataloader(self.test_input)

    def training_step(self, batch, batch_idx):
        x,y = batch['image'], batch['label'] 
        y_hat = self.model(x)

        loss = nn.BCELoss()(y_hat, y.unsqueeze(1).float())

        self.logger.experiment.add_scalars('loss',{'train':loss.detach()},self.global_step)

        return loss

    def validation_step(self, batch, batch_idx):
        x,y = batch['image'], batch['label'] 
        y_hat = self(x)

        loss = nn.BCELoss()(y_hat, y.unsqueeze(1).float())
        self.logger.experiment.add_scalars('loss',{'val':loss.detach()},self.global_step)

        auc = AUROC()(y_hat, y.unsqueeze(1).int())
        self.logger.experiment.add_scalars('auc',{'val':auc},self.global_step)

        self.log('val_loss',loss)

        return loss

    def test_step(self, batch, batch_idx):
        paths, images = batch['path'], batch['image']
        y_hat = self.model(images)
        
        for p, f in zip(paths, y_hat.cpu().detach().numpy()):
            self.features.loc[p] = f

    def df_to_input(self, df):
        return [
            {'path':r['path'], 
            'image':r['path'],
            'label':r['dcb']}
            for _, r in df.iterrows()
        ]

    
    


In [8]:
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning import Trainer

model = PatchLevelClassifier(data_path)

logger = TensorBoardLogger('tb_logs', name='05_patch_level_classification')

trainer = Trainer(
    max_epochs=200,
    log_every_n_steps=1,
    gpus=1,
    logger=logger,
    # callbacks=[early_stopping],
    # fast_dev_run=True
)
trainer.fit(model)

Columns (24,26,111,115,200,204,361,363,470,477,480,521,547,585,591,654) have mixed types.Specify dtype option on import or set low_memory=False.
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type     | Params
-----------------------------------
0 | model | DenseNet | 7.0 M 
-----------------------------------
7.0 M     Trainable params
0         Non-trainable params
7.0 M     Total params
27.820    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: -1it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]