In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import pandas as pd
import numpy as np
import gc

from PIL import Image
from tqdm.auto import tqdm

# pytorch lighting
import pytorch_lightning as pl

from src.config import *
from src.dataset import *
from src.models import *
from src.utils import *

import warnings 
warnings.filterwarnings('ignore')

In [2]:
CFG = Config

In [3]:
df_folds = pd.read_csv(CFG.study_folds_csv_path)
df_folds

Unnamed: 0,study_id,Negative for Pneumonia,Typical Appearance,Indeterminate Appearance,Atypical Appearance,None,Opacity,label,fold,set,study,series,image
0,00086460a852,0,1,0,0,0,1,1,1,train,00086460a852,9e8302230c91,65761e66de9f
1,000c9c05fd14,0,0,0,1,1,0,3,2,train,000c9c05fd14,e555410bd2cd,51759b5579bc
2,00292f8c37bd,1,0,0,0,1,0,0,0,train,00292f8c37bd,73120b4a13cb,f6293b1c49e2
3,005057b3f880,1,0,0,0,1,0,0,1,train,005057b3f880,e34afce999c5,3019399c31f4
4,0051d9b12e72,0,0,0,1,0,1,3,3,train,0051d9b12e72,152f6ec68d86,bb4b1da810f3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6222,ffcb4630f46f,0,1,0,0,0,1,1,4,train,ffcb4630f46f,d4d43f6fe855,84ed5f7f71bf
6223,ffe4d6e8fbb0,0,1,0,0,0,1,1,0,train,ffe4d6e8fbb0,cc4df88fb786,e6215d0188e5
6224,ffe94fcb14fa,0,1,0,0,0,1,1,3,train,ffe94fcb14fa,c0382e85ff25,7d27b1bb3987
6225,ffebf1ef4a9c,0,1,0,0,0,1,1,1,train,ffebf1ef4a9c,e11401cd12d0,52478e480a15


In [4]:
from models import *
class SIIMPLModel(pl.LightningModule):
    def __init__(self):
        super(SIIMPLModel,self).__init__()
        self.model = SIIMMaskNet(CFG.model_name, CFG.num_classes, pretrained=False)

    def forward(self, x):
        return self.model(x)

ModuleNotFoundError: No module named 'models'

In [None]:
def do_predict(model, test_loader, tta=['']):
    test_probability = []
    test_num = 0
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    for t, (image, _, _) in tk0:
        batch_size = image.size(0)
        image = image.to(device)
        #<todo> TTA
        model.eval()
        with torch.no_grad():
            probability = []
            logit, mask = model(image)
            probability.append(F.softmax(logit,-1))

            if 'flip' in tta:
                logit, mask = model(torch.flip(image,dims=(3,)))
                probability.append(F.softmax(logit,-1))

            if 'scale' in tta:
                # size=None, scale_factor=None, mode='nearest', align_corners=None, recompute_scale_factor=None):
                scale_image = F.interpolate(image, scale_factor=1.25, mode='bilinear', align_corners=False)
                logit, mask = model(scale_image)
                probability.append(F.softmax(logit,-1))
                
#                 logit, mask = model(torch.flip(scale_image,dims=(3,)))
#                 probability.append(F.softmax(logit,-1))

            #--------------
            probability = torch.stack(probability,0).mean(0)

        test_num += batch_size
        test_probability.append(probability.data.cpu().numpy())
    assert(test_num == len(test_loader.dataset))

    probability = np.concatenate(test_probability)
    return probability

# ====================================================
# Helper functions
# ====================================================
def inference(model, states, test_loader, device, tta=['']):
    probs = 0
    for state in states:
        model = model.load_from_checkpoint(state)
        model.to(device)
        probability = do_predict(model, test_loader, tta)
        probs += probability**0.5
#         probs += probability
    probs = probs/len(states) 
    return probs

In [None]:
from sklearn.metrics import average_precision_score

def compute_oof_map(df):
    negative_map = average_precision_score(df['Negative for Pneumonia'], df['negative'])
    LOGGER.info(f'negative_map is {negative_map}')
    typical_map = average_precision_score(df['Typical Appearance'], df['typical'])
    LOGGER.info(f'typical_map is {typical_map}')
    indeterminate_map = average_precision_score(df['Indeterminate Appearance'], df['indeterminate'])
    LOGGER.info(f'indeterminate_map is {indeterminate_map}')
    atypical_map = average_precision_score(df['Atypical Appearance'], df['atypical'])
    LOGGER.info(f'atypical_map is {atypical_map}')
    none_map = average_precision_score(df['None'], df['negative'])
    LOGGER.info(f'none_map is {none_map}')
    mAP4 = (negative_map + typical_map + indeterminate_map + atypical_map) / 6
    mAP5 = (negative_map + typical_map + indeterminate_map + atypical_map + none_map) / 6
    return mAP4, mAP5

In [None]:
# ====================================================
# inference
# ====================================================
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


LOGGER = get_log(file_name=CFG.save_dir + 'valid.log')

### init pl model
pl_model = SIIMPLModel()

states = [
    f'{CFG.save_dir}/{CFG.model_name}/fold_0/epoch=17_mAP=0.387950.ckpt', 
    f'{CFG.save_dir}/{CFG.model_name}/fold_1/epoch=17_mAP=0.401342.ckpt',
    f'{CFG.save_dir}/{CFG.model_name}/fold_2/epoch=17_mAP=0.382397.ckpt',
    f'{CFG.save_dir}/{CFG.model_name}/fold_3/epoch=20_mAP=0.370457.ckpt',
    f'{CFG.save_dir}/{CFG.model_name}/fold_4/epoch=18_mAP=0.396522.ckpt',
]

fold_ttas = [
    ['flip'],
    ['flip'],
    ['flip'],
    ['flip'],
    ['flip'],
    ]

# fold_ttas = [
#     [''],
#     [''],
#     [''],
#     [''],
#     [''],
#     ]


average_map4s = []
average_map5s = []
for fold in CFG.trn_fold:
    fold_states = []
    df_valid = df_folds[df_folds.fold == fold].reset_index(drop=True)
    
    df_valid['negative'] = 0
    df_valid['typical'] = 0
    df_valid['indeterminate'] = 0
    df_valid['atypical'] = 0
    
    label_cols = df_valid.columns[13:]
    print(label_cols)
    valid_dataset = SIIMMaskDataset(
        CFG, 
        df_valid, 
        transforms=get_val_transforms(CFG),
        preprocessing=get_preprocessing()) 
    
    valid_dataloader = DataLoader(
        valid_dataset,
        CFG.batch_size*2, 
        num_workers=4, 
        shuffle=False) 
    
    fold_states.append(states[fold])
    fold_tta = fold_ttas[fold]
    LOGGER.info(f'fold{fold} tta is {fold_tta}')
    
    predictions = inference(pl_model, fold_states, valid_dataloader, device, fold_tta)
    df_valid[label_cols] = predictions
    
    mAP4,mAP5 = compute_oof_map(df_valid)
    LOGGER.info(f'fold{fold} average mAP*(4/6) is {mAP4}')
    LOGGER.info(f'fold{fold} average mAP*(5/6) is {mAP5}')
#     df_valid.to_csv(f'{CFG.save_dir}{CFG.model_name}/fold_{fold}/oof_preds_tta_flip.csv',index=False)
    average_map4s.append(mAP4)
    average_map5s.append(mAP5)

In [None]:
LOGGER.info(f'Each fold average mAP*(4/6) is {average_map4s}')
LOGGER.info(f'Each fold average mAP*(5/6) is {average_map5s}')

In [None]:
avg_mAP4 = np.array(average_map4s).mean()
avg_mAP5 = np.array(average_map5s).mean()

In [None]:
LOGGER.info(f'{len(CFG.trn_fold)} folds average mAP*(4/6) is {avg_mAP4}')
LOGGER.info(f'{len(CFG.trn_fold)} folds average mAP*(5/6) is {avg_mAP5}')