In [1]:
import argparse
import copy
from pathlib import Path

import numpy as np
import pandas as pd
import torch
from sklearn.metrics import roc_auc_score

from ml import learner, data
from ml.vision import utils
from pipe import constants, augmentations

## Load  and validate

In [2]:
test = pd.read_csv(constants.sample_submission_fpath)
train = pd.read_csv(constants.train_folds_fpath)

In [3]:
test_image_paths = [
    constants.data_path / f"test_256/{x}.jpg" for x in test.StudyInstanceUID.values
]
test_image_paths[:5]

[PosixPath('/home/gianluca/git/kaggle/ranzcr/data/test_256/1.2.826.0.1.3680043.8.498.46923145579096002617106567297135160932.jpg'),
 PosixPath('/home/gianluca/git/kaggle/ranzcr/data/test_256/1.2.826.0.1.3680043.8.498.84006870182611080091824109767561564887.jpg'),
 PosixPath('/home/gianluca/git/kaggle/ranzcr/data/test_256/1.2.826.0.1.3680043.8.498.12219033294413119947515494720687541672.jpg'),
 PosixPath('/home/gianluca/git/kaggle/ranzcr/data/test_256/1.2.826.0.1.3680043.8.498.84994474380235968109906845540706092671.jpg'),
 PosixPath('/home/gianluca/git/kaggle/ranzcr/data/test_256/1.2.826.0.1.3680043.8.498.35798987793805669662572108881745201372.jpg')]

In [4]:
train_image_paths = [
    constants.data_path / f"train_256/{x}.jpg" for x in train.StudyInstanceUID.values
]
train_image_paths[:5]

[PosixPath('/home/gianluca/git/kaggle/ranzcr/data/train_256/1.2.826.0.1.3680043.8.498.10980236531551060314989711662517682573.jpg'),
 PosixPath('/home/gianluca/git/kaggle/ranzcr/data/train_256/1.2.826.0.1.3680043.8.498.31644041654883936177483097186069054689.jpg'),
 PosixPath('/home/gianluca/git/kaggle/ranzcr/data/train_256/1.2.826.0.1.3680043.8.498.92698499133241834162015009420418179750.jpg'),
 PosixPath('/home/gianluca/git/kaggle/ranzcr/data/train_256/1.2.826.0.1.3680043.8.498.10069138128460654269681788885297696718.jpg'),
 PosixPath('/home/gianluca/git/kaggle/ranzcr/data/train_256/1.2.826.0.1.3680043.8.498.10994992736051720791598262985362954566.jpg')]

## Predict on training set

In [5]:
checkpoint_path = Path('../models/arch=resnest14d_sz=128_fold=0.ckpt')
pretrained_model = learner.ImageClassifier.load_from_checkpoint(
    checkpoint_path, in_channels=1, num_classes=11
)
pretrained_model.freeze()

In [6]:
hparams = argparse.Namespace(
    arch='resnest14d',
    sz=128,
    test_data=Path('../data/train_256'),
    aug='baseline'
)

train_aug, valid_aug, test_aug = augmentations.augmentations_factory(hparams)

dm = data.ImageDataModule(
    batch_size=256,
    test_image_paths=train_image_paths,
    test_augmentations=test_aug,
)
dm.setup()

outs = list(pretrained_model.predict_proba(dm.test_dataloader))
preds = np.vstack(outs)

train_preds = copy.copy(train)
train_preds[constants.target_cols] = preds
train_preds[constants.target_cols].mean()

ETT - Abnormal                0.003287
ETT - Borderline              0.056802
ETT - Normal                  0.229722
NGT - Abnormal                0.007696
NGT - Borderline              0.013912
NGT - Incompletely Imaged     0.082583
NGT - Normal                  0.158254
CVC - Abnormal                0.092647
CVC - Borderline              0.255826
CVC - Normal                  0.774290
Swan Ganz Catheter Present    0.028722
dtype: float64

## Predict on test set

In [7]:
hparams = argparse.Namespace(
    arch='resnest14d',
    sz=128,
    test_data=Path('../data/test_256'),
    aug='baseline'
)

train_aug, valid_aug, test_aug = augmentations.augmentations_factory(hparams)

dm = data.ImageDataModule(
    batch_size=256,
    test_image_paths=test_image_paths,
    test_augmentations=test_aug,
)
dm.setup()

outs = list(pretrained_model.predict_proba(dm.test_dataloader))
preds = np.vstack(outs)

test_preds = copy.copy(test)
test_preds[constants.target_cols] = preds
test_preds[constants.target_cols].mean()

ETT - Abnormal                0.001467
ETT - Borderline              0.023831
ETT - Normal                  0.106803
NGT - Abnormal                0.004306
NGT - Borderline              0.008315
NGT - Incompletely Imaged     0.037123
NGT - Normal                  0.073400
CVC - Abnormal                0.069812
CVC - Borderline              0.241402
CVC - Normal                  0.767635
Swan Ganz Catheter Present    0.012354
dtype: float64

## Train target distribution

In [8]:
train[constants.target_cols].mean()

ETT - Abnormal                0.002626
ETT - Borderline              0.037829
ETT - Normal                  0.240667
NGT - Abnormal                0.009274
NGT - Borderline              0.017585
NGT - Incompletely Imaged     0.091347
NGT - Normal                  0.159459
CVC - Abnormal                0.106206
CVC - Borderline              0.281222
CVC - Normal                  0.708839
Swan Ganz Catheter Present    0.027590
dtype: float64