In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import pandas as pd
import numpy as np
import gc

from PIL import Image
from tqdm.auto import tqdm

# pytorch lighting
import pytorch_lightning as pl

from src.config import *
from src.dataset import *
from src.models import *

import warnings 
warnings.filterwarnings('ignore')

In [2]:
data_dir = '/data/siim-covid19-detection/'
test_csv_path = data_dir + 'sample_submission.csv'

In [3]:
df_test = pd.read_csv(test_csv_path)
df_test

Unnamed: 0,id,PredictionString
0,00188a671292_study,negative 1 0 0 1 1
1,004bd59708be_study,negative 1 0 0 1 1
2,00508faccd39_study,negative 1 0 0 1 1
3,006486aa80b2_study,negative 1 0 0 1 1
4,00655178fdfc_study,negative 1 0 0 1 1
...,...,...
2472,46719b856de1_image,none 1 0 0 1 1
2473,31c07523a69a_image,none 1 0 0 1 1
2474,f77d7d1aebab_image,none 1 0 0 1 1
2475,ccc5b63ca96d_image,none 1 0 0 1 1


In [4]:
id_laststr_list  = []
for i in range(df_test.shape[0]):
    id_laststr_list.append(df_test.loc[i,'id'][-1])
df_test['id_last_str'] = id_laststr_list
df_test

Unnamed: 0,id,PredictionString,id_last_str
0,00188a671292_study,negative 1 0 0 1 1,y
1,004bd59708be_study,negative 1 0 0 1 1,y
2,00508faccd39_study,negative 1 0 0 1 1,y
3,006486aa80b2_study,negative 1 0 0 1 1,y
4,00655178fdfc_study,negative 1 0 0 1 1,y
...,...,...,...
2472,46719b856de1_image,none 1 0 0 1 1,e
2473,31c07523a69a_image,none 1 0 0 1 1,e
2474,f77d7d1aebab_image,none 1 0 0 1 1,e
2475,ccc5b63ca96d_image,none 1 0 0 1 1,e


In [5]:
study_len = df_test[df_test['id_last_str'] == 'y'].shape[0]
study_len

1214

In [6]:
test_df = pd.read_csv(test_csv_path)
test_df

Unnamed: 0,id,PredictionString
0,00188a671292_study,negative 1 0 0 1 1
1,004bd59708be_study,negative 1 0 0 1 1
2,00508faccd39_study,negative 1 0 0 1 1
3,006486aa80b2_study,negative 1 0 0 1 1
4,00655178fdfc_study,negative 1 0 0 1 1
...,...,...
2472,46719b856de1_image,none 1 0 0 1 1
2473,31c07523a69a_image,none 1 0 0 1 1
2474,f77d7d1aebab_image,none 1 0 0 1 1
2475,ccc5b63ca96d_image,none 1 0 0 1 1


In [7]:
test_df['negative'] = 0
test_df['typical'] = 0
test_df['indeterminate'] = 0
test_df['atypical'] = 0
test_df

Unnamed: 0,id,PredictionString,negative,typical,indeterminate,atypical
0,00188a671292_study,negative 1 0 0 1 1,0,0,0,0
1,004bd59708be_study,negative 1 0 0 1 1,0,0,0,0
2,00508faccd39_study,negative 1 0 0 1 1,0,0,0,0
3,006486aa80b2_study,negative 1 0 0 1 1,0,0,0,0
4,00655178fdfc_study,negative 1 0 0 1 1,0,0,0,0
...,...,...,...,...,...,...
2472,46719b856de1_image,none 1 0 0 1 1,0,0,0,0
2473,31c07523a69a_image,none 1 0 0 1 1,0,0,0,0
2474,f77d7d1aebab_image,none 1 0 0 1 1,0,0,0,0
2475,ccc5b63ca96d_image,none 1 0 0 1 1,0,0,0,0


In [8]:
label_cols = test_df.columns[2:]
label_cols

Index(['negative', 'typical', 'indeterminate', 'atypical'], dtype='object')

In [9]:
CFG = Config

In [10]:
def to_tensor(x, **kwargs):
    if x.ndim==2 : 
        x = np.expand_dims(x,2)
    x = np.transpose(x,(2,0,1)).astype('float32') / 255.
    
    x = torch.from_numpy(x)
    return x

def get_preprocessing():
    """Construct preprocessing transform
    
    Args:
        preprocessing_fn (callbale): data normalization function 
            (can be specific for each pretrained neural network)
    Return:
        transform: albumentations.Compose
    
    """
    
    _transform = [
#         A.Lambda(image=preprocessing_fn),
        A.Lambda(image=to_tensor, mask=to_tensor),
    ]
    return A.Compose(_transform)


def get_val_transforms(CFG):
    return A.Compose([
            A.Resize(CFG.image_size, CFG.image_size),
#             A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
#             ToTensorV2(),
        ],p=1.0)

class SIIMTestDataset(Dataset):
    def __init__(self, df, transforms=None, preprocessing=None):
        super().__init__()
        self.df = df
        self.transforms = transforms
        self.preprocessing = preprocessing
        self.length = len(df)
        
    def __len__(self):
        return self.length

    def __getitem__(self, index):
        d = self.df.iloc[index]
        split = d.id.split('_')[-1]
        
        if split == 'study':
            image_path =  '/home/chen/ai-competition/siim-covid19-detection/test/study/%s.png' % (d.id)
        else:
            image_path =  '/home/chen/ai-competition/siim-covid19-detection/test/image/%s.png' % (d.id)
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if image is None:
            raise FileNotFoundError(image_path)
        
        # apply augmentations
        if self.transforms:
            image = self.transforms(image=image)['image']
        else:
            image = torch.from_numpy(image)
            
        # apply preprocessing
        if self.preprocessing:
            sample = self.preprocessing(image=image)
            image = sample['image']
    
        return image


In [11]:
test_dataset = SIIMTestDataset(test_df, transforms=get_val_transforms(CFG), preprocessing=get_preprocessing())
test_loader = DataLoader(test_dataset, 32, num_workers=4, shuffle=False) 

In [12]:
class SIIMPLModel(pl.LightningModule):
    def __init__(self):
        super(SIIMPLModel,self).__init__()
        self.model = SIIMMaskNet(CFG.model_name, CFG.num_classes, pretrained=False)

    def forward(self, x):
        return self.model(x)

In [13]:
def do_predict(model, test_loader, tta=['']):
    print(f'tta is {tta}')
    test_probability = []
    test_num = 0
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    for t, (image) in tk0:
        batch_size = image.size(0)
        image = image.to(device)
        #<todo> TTA
        model.eval()
        with torch.no_grad():
            probability = []
            logit, mask = model(image)
            probability.append(F.softmax(logit,-1))

            if 'flip' in tta:
                logit, mask = model(torch.flip(image,dims=(3,)))
                probability.append(F.softmax(logit,-1))

            if 'scale' in tta:
                # size=None, scale_factor=None, mode='nearest', align_corners=None, recompute_scale_factor=None):
                logit, mask = model(F.interpolate(image, scale_factor=1.25, mode='bilinear', align_corners=False))
                probability.append(F.softmax(logit,-1))

            #--------------
            probability = torch.stack(probability,0).mean(0)

        test_num += batch_size
        test_probability.append(probability.data.cpu().numpy())
    assert(test_num == len(test_loader.dataset))

    probability = np.concatenate(test_probability)
    return probability

In [14]:
# ====================================================
# Helper functions
# ====================================================
def inference(model, states, test_loader, device, ttas):
    probs = 0
    for (state,tta) in zip(states,ttas):
        model = model.load_from_checkpoint(state)
        model.to(device)
        probability = do_predict(model, test_loader, tta)
        probs += probability**0.5
    probs = probs/len(states) 
    return probs

In [15]:
# ====================================================
# inference
# ====================================================
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
### init pl model
pl_model = SIIMPLModel()

#### drop duplicate images and save one 6227
models_path = f'/data/output/pl-siim-covid19-study-classification/\
{CFG.model_name}_folds_cutout_{CFG.image_size}_30e_Adam_GradualWarmupSchedulerV2_\
LabelSmoothingBinaryCrossEntropy_ls0.0_lovasz0.0_v2_dropdup'

### effnet-b5 best cv=0.3877336, lb=0.535
states = [
    f'{models_path}/{CFG.model_name}/fold_0/epoch=17_mAP=0.387950.ckpt',
    f'{models_path}/{CFG.model_name}/fold_1/epoch=17_mAP=0.401342.ckpt',
    f'{models_path}/{CFG.model_name}/fold_2/epoch=17_mAP=0.382397.ckpt',
    f'{models_path}/{CFG.model_name}/fold_3/epoch=20_mAP=0.370457.ckpt',
    f'{models_path}/{CFG.model_name}/fold_4/epoch=18_mAP=0.396522.ckpt',
]

fold_ttas = [
    ['flip'],
    ['flip'],
    ['flip'],
    ['flip'],
    ['flip'],
    ]
predictions = inference(pl_model, states, test_loader, device, fold_ttas)

tta is ['flip']


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=78.0), HTML(value='')))




KeyboardInterrupt: 

In [None]:
predictions

In [None]:
test_df[label_cols] = predictions
test_df

In [None]:
# test_df.to_csv(f'./predicts/{CFG.model_name}_folds_{CFG.image_size}_study_siim_test_ricord_pseudo_v3_flip.csv',index=False)