## Initialization

In [None]:
!pip install /kaggle/input/rsna-2022-whl/{pydicom-2.3.0-py3-none-any.whl,pylibjpeg-1.4.0-py3-none-any.whl,python_gdcm-3.0.15-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl}
#!pip install /kaggle/input/nvidia-dali-wheel/nvidia_dali_nightly_cuda110-1.22.0.dev20221213-6757685-py3-none-manylinux2014_x86_64.whl
!pip install /kaggle/input/nvidia-dali-wheel/dicomsdl-0.109.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl

In [2]:
import argparse
import gc
import importlib
import os
import sys
from numba import cuda 

sys.path.append('../input/monai-v101/')
sys.path.append('../input/timm-0-6-9/pytorch-image-models-master/')

import shutil
import glob
import multiprocessing
import pydicom as dcm
from pydicom.pixel_data_handlers.util import apply_voi_lut
import dicomsdl
import numpy as np
import pandas as pd
import torch
from torch import nn
import torch.nn.functional as F
from torch.cuda.amp import GradScaler, autocast
from torch.utils.data import DataLoader, Dataset
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
import albumentations as A
from albumentations import *
from albumentations.pytorch.transforms import ToTensorV2
import json
import cv2
from sklearn.metrics import roc_auc_score
import timm
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedGroupKFold
from types import SimpleNamespace
import seaborn as sns
from prettytable import PrettyTable
from PIL import Image
from joblib import Parallel, delayed
import tensorflow as tf

sys.path.append("/kaggle/input/solution")
from Dataset import *
from Lookahead import *
from Model import *
from config import *
from utils import *
from trainer import *

In [3]:
DEBUG = False
debug_item = 10042
NUM_OF_CORES=56

In [4]:
cfg.img_size = (512, 256)
cfg.batch_size = 32
cfg.val_batch_size = 64
cfg.clip_thr = 0.6735
cfg.selectedP = 4
cfg.weights = None # Trained Model Weight
cfg.tta = False
cfg.out_classes = ["cancer", "invasive"]
cfg.aux_input = ["age", "implant", "site"]

## Data Preparation

In [6]:
# data locations
cfg.data_dir = "/kaggle/input/rsna-breast-cancer-detection"
cfg.test_img_dir = "/kaggle/input/rsna-breast-cancer-detection/test_images/"
cfg.root_dir = "/kaggle/tmp/output/"
os.makedirs(cfg.root_dir, exist_ok=True)

# df
df_train = pd.read_csv('/kaggle/input/rsna-breast-cancer-detection/train.csv')
df_train["prediction_id"] = df_train.patient_id.apply(str) + "_" + df_train.laterality
df_train['fold'] = 100
df_test = pd.read_csv('/kaggle/input/rsna-breast-cancer-detection/test.csv')
df_sub = pd.read_csv('/kaggle/input/rsna-breast-cancer-detection/sample_submission.csv')

# DEBUG
test_images = glob.glob("/kaggle/input/rsna-breast-cancer-detection/test_images/*/*.dcm")
if DEBUG:
    df_test = pd.read_csv('/kaggle/input/rsna-breast-cancer-detection/train.csv')
    cfg.test_img_dir = "/kaggle/input/rsna-breast-cancer-detection/train_images/"
    test_images = glob.glob(f"{cfg.test_img_dir}{debug_item}/*.dcm")
    df_test = df_test[df_test["patient_id"]==debug_item]
    df_test["prediction_id"] = df_test.patient_id.apply(str) + "_" + df_test.laterality 
print("Number of images :", len(test_images))


# process df
for ddf in [df_train, df_test]:
    ddf['view'] = ddf['view'].map({machine_id: idx for idx, machine_id in enumerate(sorted(ddf['view'].unique()))})
    ddf['site_id'] -= 1
    ddf['age'].fillna(ddf['age'].mean(), inplace=True)
    ddf["age"] = ddf['age'] / 100
    ddf = ddf.reset_index(drop=True)

Number of images : 4


In [7]:
def load_image_dicomsdl(i):
    dcm_file = cfg.test_img_dir + df_test.patient_id.astype(str).iloc[i] + "/" + df_test.image_id.astype(str).iloc[i] + '.dcm'
    dataset = dicomsdl.open(dcm_file)
    img = dataset.pixelData()

    try:
            # Load only the variables we need
            center = dataset["WindowCenter"]
            width = dataset["WindowWidth"]
            bits_stored = dataset["BitsStored"]
            voi_lut_function = dataset["VOILUTFunction"]

            # For sigmoid it's a list, otherwise a single value
            if isinstance(center, list):
                center = center[0]
            if isinstance(width, list):
                width = width[0]

            # Set y_min, max & range
            y_min = 0
            y_max = float(2**bits_stored - 1)
            y_range = y_max

            # Function with default LINEAR (so for Nan, it will use linear)
            if voi_lut_function == "SIGMOID":
                img = y_range / (1 + np.exp(-4 * (img - center) / width)) + y_min
            else:
                # Checks width for < 1 (in our case not necessary, always >= 750)
                center -= 0.5
                width -= 1

                below = img <= (center - width / 2)
                above = img > (center + width / 2)
                between = np.logical_and(~below, ~above)

                img[below] = y_min
                img[above] = y_max
                if between.any():
                    img[between] = (
                        ((img[between] - center) / width + 0.5) * y_range + y_min
                    )
    except Exception as e:
#         dataset = dicomsdl.open(img_path)
        img = dataset.pixelData()


    img = (img - img.min()) / (img.max() - img.min())
    if dataset["PhotometricInterpretation"] == "MONOCHROME1":
        img = 1 - img

    img = (img * 255).astype(np.uint8)
    
    Y = img
    xmin = Y.min()
    xmax = Y.max()

    norm = np.empty_like(Y, dtype=np.uint8)

    dicomsdl.util.convert_to_uint8(Y, norm, xmin, xmax)

    X = norm
    X = X[5:-5, 5:-5]

    output= cv2.connectedComponentsWithStats((X > 10).astype(np.uint8)[:, :], 8, cv2.CV_32S) #
    stats = output[2]

    idx = stats[1:, 4].argmax() + 1
    x1, y1, w, h = stats[idx][:4]
    x2 = x1 + w
    y2 = y1 + h

    img = Y[y1: y2, x1: x2]
    img = Image.fromarray(img)
    img = img.resize([cfg.img_size[1],cfg.img_size[0]], resample=Image.Resampling.LANCZOS)
    img.save(cfg.root_dir + df_test.patient_id.astype(str).iloc[i] + "_" + df_test.image_id.astype(str).iloc[i] + ".png", "PNG")

In [None]:
Parallel(n_jobs=NUM_OF_CORES)(
    delayed(load_image_dicomsdl)(f)
    for f in tqdm(range(len(df_test.patient_id)))
    )

# Evaluation 

In [10]:
def gem(x):
    x = x.pow(cfg.selectedP)
    x = np.mean(x)
    x = np.power(x, 1.0/cfg.selectedP)
    return x

In [11]:
set_seed(cfg.seed)
model = Model(cfg)
model.load_state_dict(torch.load(cfg.weights)["model"])
ter = trainer(cfg,
     df = df_train,
     model = model,
     test = True,
     test_df = df_test
   )
output_df = ter.predict("Test")

100%|██████████| 1/1 [00:08<00:00,  8.13s/it]


In [12]:
df_pred = output_df[['prediction_id', 'cancer_outputs']].reset_index(drop=True)
df_pred = df_pred.rename(columns={"cancer_outputs": "cancer"})
df_pred = df_pred.groupby("prediction_id").agg(func=gem).reset_index()
df_pred["cancer"] = (df_pred["cancer"] > cfg.clip_thr).astype(int)
df_pred

Unnamed: 0,prediction_id,cancer
0,10008_L,0
1,10008_R,0


In [13]:
df_sub = df_sub[["prediction_id"]].merge(df_pred, on="prediction_id", how="left")
df_sub = df_sub.fillna(0.0)
df_sub.to_csv('/kaggle/working/submission.csv', index=False)