In [1]:
import os
import torch
import random
import warnings

import numpy as np
import pandas as pd
import torch.backends.cudnn as cudnn
import segmentation_models_pytorch as smp

from tqdm import tqdm
from minetorch.metrics import confusion_matrix
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from utils import *
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingLR
from efficientnet_pytorch.model import EfficientNet
import torchvision.models as models

warnings.filterwarnings("ignore")
seed = 99
random.seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
np.random.seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.set_default_tensor_type("torch.FloatTensor")
cudnn.benchmark = True

In [2]:
test_df = pd.read_csv('stage_2_sample_submission.csv')
test_dir = './stage2/test512/'

In [3]:
test_df.head()

Unnamed: 0,ID,Label
0,ID_0fbf6a978_epidural,0.5
1,ID_0fbf6a978_intraparenchymal,0.5
2,ID_0fbf6a978_intraventricular,0.5
3,ID_0fbf6a978_subarachnoid,0.5
4,ID_0fbf6a978_subdural,0.5


In [4]:
test_set = TestDataset(test_dir,test_df)
test_set_tta = TestDataset(test_dir, test_df, TTA=True)

In [5]:
test_loader = DataLoader(
    test_set,
    batch_size=2,
    num_workers=4,
    pin_memory=True,
)
test_tta_loader = DataLoader(
    test_set_tta,
    batch_size=2,
    num_workers=4,
    pin_memory=True,
)

In [6]:
val_batch_size = 2
df = pd.read_csv('train.csv')
train_dir = './data/train512/'

_, val_df = train_test_split(df, test_size=0.1)

val_set = RSNADataset(val_df,train_dir,make_transforms('val'))

val_loader = DataLoader(
    val_set,
    batch_size=val_batch_size,
    num_workers=4,
    pin_memory=True
)

In [7]:
model = EfficientNet.from_pretrained('efficientnet-b4',num_classes=6)
#num_classes = 6
#model = models.inception_v3(pretrained=True, aux_logits=False)
#model.fc = torch.nn.Linear(2048, num_classes)
model = model.cuda()
model.eval()
ckpt_path = "/home/pengbo/project/RSNA/efficientnet-b4/fold-1/models/epoch_5.pth.tar"
device = torch.device("cuda:0")
state = torch.load(ckpt_path, map_location=lambda storage, loc: storage)
model.load_state_dict(state["state_dict"])

Loaded pretrained weights for efficientnet-b4


<All keys matched successfully>

In [8]:
# threshold search

results = []
c_matrix = {}
for idx, (images,targets) in tqdm(enumerate(val_loader), total=len(val_loader)):
    logits = torch.sigmoid(model(images.cuda()))
    for i, logit in enumerate(logits):
        for threshold in range(61,65,1):
            threshold /= 100
            if '%s'%(threshold) not in c_matrix.keys():
                c_matrix['%s'%(threshold)] = 0
            c_matrix['%s'%(threshold)] += confusion_matrix(logit, targets[i].cuda(),threshold=threshold)[0]

for threshold, matrix in c_matrix.items():
    precision = matrix[0] / (matrix[0] + matrix[1])
    recall = matrix[0] / (matrix[0] + matrix[2])
    precision = precision.detach().cpu().numpy()
    recall = recall.detach().cpu().numpy()
    results.append([
        threshold,
        (1.3*precision[0] + recall[0]),
        (1.3*precision[1] + recall[1]),
        (1.3*precision[2] + recall[2]),
        (1.3*precision[3] + recall[3]),
        (1.3*precision[4] + recall[4]),
        (1.3*precision[5] + recall[5])
    ]
    )

output = pd.DataFrame(
    results,
    columns=[
        'threshold',
        'epidural',
        'intraparenchymal',
        'intraventricular',
        'subarachnoid',
        'subdural',
        'any'
    ]
)


100%|██████████| 33713/33713 [15:44<00:00, 35.68it/s]


In [9]:
output.head()

Unnamed: 0,threshold,epidural,intraparenchymal,intraventricular,subarachnoid,subdural,any
0,0.61,1.171239,1.850551,1.86911,1.512974,1.616342,1.922814
1,0.62,1.183247,1.857687,1.876031,1.523588,1.606556,1.931487
2,0.63,1.210619,1.861192,1.887697,1.535232,1.60953,1.940677
3,0.64,1.226063,1.865168,1.901492,1.547648,1.599741,1.945747


In [10]:
thresholds = []
for cat in output.columns[1:]:
    print('best thresholds for %s: '%(cat), float(output.iloc[np.argmax(output[cat])]['threshold']))
    thresholds.append(float(output.iloc[np.argmax(output[cat])]['threshold']))

best thresholds for epidural:  0.64
best thresholds for intraparenchymal:  0.64
best thresholds for intraventricular:  0.64
best thresholds for subarachnoid:  0.64
best thresholds for subdural:  0.61
best thresholds for any:  0.64


In [11]:
print(thresholds)

[0.64, 0.64, 0.64, 0.64, 0.61, 0.64]


In [8]:
#thresholds = [0.64, 0.64, 0.64, 0.64, 0.61, 0.64]

In [12]:
def threshold_map(logits, threshold):
    # map logtis from threshold to 0.5
    # ------------ 0.5 --------------------
    # |               \                   |
    # ----------------threshold------------
    x = (logits <= threshold).float() * (0.5 / threshold)
    y = (logits > threshold).float() * (0.5 / (1 - threshold))
    y_ = (logits > threshold).float() * (1 - 0.5 / (1 - threshold))
    logits = (x + y) * logits + y_
    return logits.detach().cpu().numpy()

In [None]:
predictions = []
for i, (batch, tta_batch) in enumerate(tqdm(zip(test_loader, test_tta_loader), total=len(test_loader))):
    fnames, imgs = batch
    fnames_tta, imgs_tta = tta_batch
    preds = torch.sigmoid(model(imgs.cuda()))*0.65 + torch.sigmoid(model(imgs_tta.cuda()))*0.35
    preds = threshold_map(preds, torch.Tensor(thresholds).cuda())
    #preds = preds.detach().cpu().numpy()
    for fname, pred in zip(fnames, preds):
        predictions.append([fname, pred[0], pred[1], pred[2], pred[3], pred[4], pred[5]])

 44%|████▎     | 26488/60616 [23:11<29:53, 19.03it/s]

In [21]:
df = pd.DataFrame(predictions, columns=['name', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural','any'])

In [None]:
values = []
for i,j in df.iterrows():
    fname = j['name'].split('.')[0]
    values.append([fname + '_epidural',j['epidural']])
    values.append([fname + '_intraparenchymal',j['intraparenchymal']])
    values.append([fname + '_intraventricular', j['intraventricular']])
    values.append([fname + '_subarachnoid', j['subarachnoid']])
    values.append([fname + '_subdural', j['subdural']])
    values.append([fname + '_any', j['any']])

In [None]:
res = pd.DataFrame(values, columns=['ID', 'Label'])

In [None]:
len(res)

In [None]:
res[res['Label'] > 0.5]

In [None]:
res.to_csv('./submission/submission__stage2_b4_5_4thresh_tta2.csv',index=False)

In [None]:
'''import pandas as pd
a = pd.read_csv('submission.csv')
a.head()
b = np.array(a['Label'])
b = b.astype(np.float)
b[b>0.8] = 0.99
b[b<0.2] = 0.000001
c = pd.DataFrame(b,columns=['Label_'])
d = pd.concat([a,c],axis=1)
d = d.drop(['Label'],axis=1)
d.columns = ['ID', 'Label']
d.head()
d.to_csv('submission.csv',index=False)'''

In [8]:
# Average Ensemble
a = pd.read_csv('./submission/submission__stage2_b0_5_4thresh_tta2.csv')
b = pd.read_csv('./submission/submission__stage2_b1_5_4thresh_tta2.csv')
#c = pd.read_csv('./submission/submission__stage2_b2_5_4thresh_tta2.csv')
d = pd.read_csv('./submission/submission__stage2_b3_5_4thresh_tta2.csv')
#e = pd.read_csv('./submission/submission__stage2_b4_5_4thresh_tta2.csv')
f = pd.read_csv('./submission/submission__stage2_inv3_6_tta2.csv')
#g = pd.read_csv('./submission/submission__stage2_rn34_5_4thresh_tta2.csv')

result = [] 
for i in tqdm(range(len(a))):
    res = (a.iloc[i]['Label'] + b.iloc[i]['Label'] + 1.5*d.iloc[i]['Label'] + 0.5*f.iloc[i]['Label']) / 4
    result.append(res)
    
res = pd.concat([a,pd.DataFrame(result, columns=['Label_'])],1)
res = res.drop('Label',axis=1)
res.rename(columns={"Label_": "Label"}).to_csv('submission/submission_stage2_ensemble_final2.csv',index=False)


100%|██████████| 727392/727392 [05:36<00:00, 2163.33it/s]


In [3]:
print(1)

1


In [9]:
!kaggle competitions submit -c rsna-intracranial-hemorrhage-detection -f ./submission/submission_stage2_ensemble_final2.csv -m "new final"

100%|██████████████████████████████████████| 32.1M/32.1M [00:13<00:00, 2.50MB/s]
Successfully submitted to RSNA Intracranial Hemorrhage Detection