In [20]:
import os
import glob
from tqdm import tqdm
import pandas as pd

import torch
from torch import nn

os.environ["CUDA_VISIBLE_DEVICES"] = '1'
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [28]:
from libs.dali_helper import J2kIterator, j2k_decode_pipeline, JllIterator, jll_process_pipeline, CustomDALIGenericIterator
from libs.image_processing import get_yolo
from libs.efficientnet import EffNetModel, predict

In [29]:
IMG_DIR = "/home/data4/share/rsna-breast-cancer-detection/test_images"

In [30]:
df = pd.read_csv("/home/data4/share/rsna-breast-cancer-detection/test.csv")
df['dcm'] = IMG_DIR + '/' + df.patient_id.astype(str) + '/' + df.image_id.astype(str) + '.dcm'
df_j2k, df_jll = df[df["site_id"]==2].reset_index(drop=True), df[df["site_id"]==1].reset_index(drop=True)
df_j2k, df_jll = df_j2k[:320], df_jll[:320]
print('j2k:', len(df_j2k), 'jll:', len(df_jll))

j2k: 4 jll: 0


In [5]:
roi_model = get_yolo()

YOLOv5 🚀 v7.0-312-g1bcd17ee Python-3.8.19 torch-1.13.1+cu116 CUDA:0 (Quadro RTX 8000, 48593MiB)

Fusing layers... 
Model summary: 157 layers, 7012822 parameters, 0 gradients, 15.8 GFLOPs
Adding AutoShape... 


In [31]:
j2k_it = J2kIterator(df_j2k, batch_size=32, img_dir="/home/data4/share/rsna-breast-cancer-detection/test_images")
j2k_pipe = j2k_decode_pipeline(j2k_it, width=768, height=768, batch_size=32, num_threads=2, device_id=0, 
                               py_num_workers=4, exec_async=False, exec_pipelined=False)
j2k_loader = CustomDALIGenericIterator(yolo_model=roi_model, length=len(df_j2k), save_img=False, pipelines=[j2k_pipe])

In [32]:
if len(df_jll) != 0:
    jll_it = JllIterator(df_jll, batch_size=32, img_dir="/home/data4/share/rsna-breast-cancer-detection/test_images")
    jll_pipe = jll_process_pipeline(jll_it, width=768, height=768, batch_size=32, num_threads=2, device_id=0, 
                                    py_num_workers=4, exec_async=False, exec_pipelined=False)
    jll_loader = CustomDALIGenericIterator(yolo_model=roi_model, length=len(df_jll), save_img=False, pipelines=[jll_pipe])

In [8]:
best_weights_vloss, best_weights_auc = [], []
for i in range(4):
    min_vloss, best_weight_vloss = 10000.0, "vloss"
    max_auc, best_weight_auc = 0.5, "auc"
    for weight in glob.glob(f"./ckpt/f{i}_noex/*.pth"):
        if float(weight.split("_")[-1][:-4]) < min_vloss:
            min_vloss = float(weight.split("_")[-1][:-4])
            best_weight_vloss = weight
        if float(weight.split("_")[-3]) > max_auc:
            max_auc = float(weight.split("_")[-3])
            best_weight_auc = weight

    best_weights_vloss.append(best_weight_vloss)
    best_weights_auc.append(best_weight_auc)

In [9]:
best_weights_auc

['./ckpt/f0_noex/f0_ep42_roc_0.740_loss_0.262.pth',
 './ckpt/f1_noex/f1_ep46_roc_0.754_loss_0.307.pth',
 './ckpt/f2_noex/f2_ep50_roc_0.765_loss_0.318.pth',
 './ckpt/f3_noex/f3_ep38_roc_0.715_loss_0.324.pth']

In [33]:
EffNets = []
for weight in tqdm(best_weights_auc):
    model = EffNetModel()
    model.to(DEVICE)
    checkpoint = torch.load(weight, map_location=torch.device('cuda:0'))
    model.load_state_dict(checkpoint['state_dict_ema'])
    model.eval()
    EffNets.append(model)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:02<00:00,  1.47it/s]


In [34]:
j2k_pred = predict(EffNets, j2k_loader)
df_j2k['cancer'] = j2k_pred
if len(df_jll)!=0:
    jll_pred = predict(EffNets, jll_loader)
    df_jll['cancer'] = jll_pred

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s]


In [35]:
THRESHOLD = 0.1
if len(df_jll)!=0:
    df = pd.concat([df_j2k, df_jll]).reset_index(drop=True)
else:
    df = df_j2k
df['prediction_id'] = df['patient_id'].astype(str) + '_' + df['laterality']
sub = df[['prediction_id', "cancer"]].groupby("prediction_id").mean().reset_index()
sub["cancer"] = sub["cancer"].astype(int)
sub["cancer"] = (sub["cancer"] > THRESHOLD).astype(int)
#sub.to_csv('/kaggle/working/submission.csv', index = False)
sub.tail()

Unnamed: 0,prediction_id,cancer
0,10008_L,0
1,10008_R,0
