In [1]:
!kaggle competitions download -c histopathologic-cancer-detection --path kaggle --quiet
!cd kaggle; unzip -qq histopathologic-cancer-detection.zip

In [1]:
import pandas as pd
import cv2
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms as T, models

from pl_flash import Flash
from pytorch_lightning import Trainer
import pytorch_lightning.metrics.functional as FM

### Define Dataset

In [2]:
class HistopathologicDataset(Dataset):
    def __init__(self, df, datadir, transform=None):
        self.fnames = [f"{datadir}/{i}.tif" for i in df.id]
        self.labels = df.label.tolist()
        self.transform = transform
    
    def __len__(self):
        return len(self.fnames)
    
    def __getitem__(self, index):
        img = cv2.imread(self.fnames[index])
        if self.transform:
            img = self.transform(img)
        return img, self.labels[index]

### Transforms

In [3]:
normalize = T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

train_transforms = T.Compose([
    T.ToPILImage(),
    T.RandomHorizontalFlip(),
    T.RandomVerticalFlip(),
    T.RandomRotation(20),
    T.CenterCrop((49, 49)),
    T.ToTensor(),
    normalize,
])

valid_transforms = T.Compose([
    T.ToPILImage(),
    T.CenterCrop((49, 49)),
    T.ToTensor(),
    normalize,
])

### DataLoaders

In [4]:
df = pd.read_csv("kaggle/train_labels.csv")
split = int(0.8 * len(df))
train_ds = HistopathologicDataset(df[:split], "kaggle/train", train_transforms)
valid_ds = HistopathologicDataset(df[split:], "kaggle/train", valid_transforms)
train_dl = DataLoader(train_ds, batch_size=512, num_workers=6, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=512, num_workers=6)

### Model

In [6]:
resnet = models.resnet50(pretrained=True)
in_features = resnet.fc.in_features

model = nn.Sequential(
    nn.Sequential(*list(resnet.children())[:-2]), # resnet until pool
    nn.AdaptiveMaxPool2d(1),
    nn.Flatten(),
    nn.BatchNorm1d(in_features),
    nn.Dropout(0.5),
    nn.Linear(in_features=in_features, out_features=512),
    nn.ReLU(),
    nn.BatchNorm1d(512),
    nn.Dropout(0.5),
    nn.Linear(in_features=512, out_features=2),
)

def auroc(x, y):
    return FM.auroc(F.softmax(x, dim=1)[:, 1], y)

flash_model = Flash(
    model,
    loss=F.cross_entropy,
    metrics={"auroc": auroc},
    optimizer="SGD",
    learning_rate=1e-2
)

### Train

In [7]:
trainer = Trainer(gpus=1, max_epochs=20)
trainer.fit(flash_model, train_dl, valid_dl)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type       | Params
-------------------------------------
0 | model | Sequential | 24 M  


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…



HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

                    When using EvalResult(early_stop_on=X) or TrainResult(early_stop_on=X) the
                    'monitor' key of ModelCheckpoint has no effect.
                    Remove ModelCheckpoint(monitor='loss) to fix')
                


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…




1

In [8]:
trainer.test(flash_model, valid_dl)

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test/auroc': tensor(0.9739, device='cuda:0'),
 'epoch_test/cross_entropy': tensor(0.1978, device='cuda:0'),
 'epoch_test/loss': tensor(0.1978, device='cuda:0')}
--------------------------------------------------------------------------------



[{'epoch_test/cross_entropy': 0.19775205850601196,
  'epoch_test/loss': 0.19775205850601196,
  'epoch_test/auroc': 0.9739193320274353}]

AUROC 0.9739 ~ top 10% on leaderboard.

See resulting [TensorBoard.dev](https://tensorboard.dev/experiment/ewumij9mQDy1wg46jegVDw/)