In [1]:
# !pip install /kaggle/input/rsna-2022-whl/pydicom-2.3.0-py3-none-any.whl
!pip install /kaggle/input/rsna-2022-whl/pylibjpeg-1.4.0-py3-none-any.whl
!pip install /kaggle/input/rsna-2022-whl/python_gdcm-3.0.15-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
!pip install /kaggle/input/opencvheadless/opencv_python_headless-4.7.0.68-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
!pip install /kaggle/input/rsnamodules/dicomsdl-0.109.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl 

Processing /kaggle/input/rsna-2022-whl/pylibjpeg-1.4.0-py3-none-any.whl
Installing collected packages: pylibjpeg
Successfully installed pylibjpeg-1.4.0
[0mProcessing /kaggle/input/rsna-2022-whl/python_gdcm-3.0.15-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Installing collected packages: python-gdcm
Successfully installed python-gdcm-3.0.15
[0mProcessing /kaggle/input/opencvheadless/opencv_python_headless-4.7.0.68-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Installing collected packages: opencv-python-headless
  Attempting uninstall: opencv-python-headless
    Found existing installation: opencv-python-headless 4.5.4.60
    Uninstalling opencv-python-headless-4.5.4.60:
      Successfully uninstalled opencv-python-headless-4.5.4.60
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
easyocr 1.6.2 requires opencv-python-h

In [2]:
!pip install --no-deps /kaggle/input/keras-cv-attention-models/keras_cv_attention_models-1.3.9-py3-none-any.whl

Processing /kaggle/input/keras-cv-attention-models/keras_cv_attention_models-1.3.9-py3-none-any.whl
Installing collected packages: keras-cv-attention-models
Successfully installed keras-cv-attention-models-1.3.9
[0m

In [3]:
import os
import cv2
import glob
import numpy as np
import pandas as pd
from tqdm import tqdm
import pydicom
import tensorflow as tf
import tensorflow_addons as tfa

import warnings
warnings.filterwarnings('ignore')

In [4]:
train_df = pd.read_csv("/kaggle/input/rsna-breast-cancer-detection/train.csv")
test_df = pd.read_csv("/kaggle/input/rsna-breast-cancer-detection/test.csv")

test_dir = '/kaggle/input/rsna-breast-cancer-detection/test_images/'

In [5]:
strategy = tf.distribute.MirroredStrategy()
print(f"Number of devices : {strategy.num_replicas_in_sync}")

Number of devices : 1


In [6]:
from keras_cv_attention_models import convnext, caformer

In [7]:
from tensorflow import keras
class pFBeta(keras.metrics.Metric):
    def __init__(self, beta=1, epsilon=1e-5, name="pF1", **kwargs):
        super().__init__(name=name, **kwargs)
        self.beta = beta  # when beta=1, metric is pF1
        self.epsilon = epsilon
        self.pos = self.add_weight(name="pos", initializer="zeros")
        self.ctp = self.add_weight(name="ctp", initializer="zeros")
        self.cfp = self.add_weight(name='cfp', initializer="zeros")
        
    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = tf.cast(y_true, tf.float32)
        y_pred = tf.clip_by_value(y_pred, 0, 1)
        pos = tf.reduce_sum(y_true)
        ctp = tf.reduce_sum(y_pred[y_true==1])
        cfp = tf.reduce_sum(y_pred[y_true==0])
        self.pos.assign_add(pos)
        self.ctp.assign_add(ctp)
        self.cfp.assign_add(cfp)
    
    def result(self):
        beta_squared = self.beta * self.beta
        c_precision = self.ctp / (self.ctp + self.cfp + self.epsilon)
        c_recall = self.ctp / (self.pos + self.epsilon)
        result = (1 + beta_squared) * c_precision * c_recall / (beta_squared * c_precision + c_recall)
        return tf.cond(c_precision >0 and c_recall >0, lambda: result, lambda: 0.0)
    
    def reset_state(self):
        self.pos.assign(0)
        self.ctp.assign(0)
        self.cfp.assign(0)
        
pf1 = pFBeta(name="pF1")
f1 = tfa.metrics.F1Score(num_classes=1, threshold=0.5)
auc = keras.metrics.AUC(name="auc")

In [8]:
custom_objects = {'pFBeta': pFBeta, 'auc': auc}
model = tf.keras.models.load_model('/kaggle/input/rnsa-breast-weight2/rsna_with_roi_model_ConvNeXtTiny_imagenet_fold1.h5', custom_objects=custom_objects)

In [9]:
# model.summary()

In [10]:
test_df.head()

Unnamed: 0,site_id,patient_id,image_id,laterality,view,age,implant,machine_id,prediction_id
0,2,10008,736471439,L,MLO,81,0,21,10008_L
1,2,10008,1591370361,L,CC,81,0,21,10008_L
2,2,10008,68070693,R,MLO,81,0,21,10008_R
3,2,10008,361203119,R,CC,81,0,21,10008_R


In [11]:
# test = test_df[['patient_id', 'image_id', 'cancer']]
test_df['image_path'] = test_df.apply(lambda x: test_dir + str(x.patient_id) + '/' + str(x.image_id) + '.dcm', axis=1)
test_df.head()

Unnamed: 0,site_id,patient_id,image_id,laterality,view,age,implant,machine_id,prediction_id,image_path
0,2,10008,736471439,L,MLO,81,0,21,10008_L,/kaggle/input/rsna-breast-cancer-detection/tes...
1,2,10008,1591370361,L,CC,81,0,21,10008_L,/kaggle/input/rsna-breast-cancer-detection/tes...
2,2,10008,68070693,R,MLO,81,0,21,10008_R,/kaggle/input/rsna-breast-cancer-detection/tes...
3,2,10008,361203119,R,CC,81,0,21,10008_R,/kaggle/input/rsna-breast-cancer-detection/tes...


In [12]:
import dicomsdl as dicoml
from tqdm import tqdm

In [13]:
# # import time
# # start_time = time.time()

# size = 512
# batch_size = 4  # adjust this based on your system's capabilities
# preds = []

# for i in range(0, len(test_df), batch_size):
#     batch_paths = test_df.image_path[i:i+batch_size]
#     batch_images = []

#     for f in batch_paths:
#         dicom = dicoml.open(f)
#         img = dicom.pixelData()
#         img = (img - img.min()) / (img.max() - img.min())
#         if dicom.getPixelDataInfo()['PhotometricInterpretation'] == "MONOCHROME1":
#             img = 1 - img

#         image = (img * 255).astype(np.uint8)
#         img = cv2.resize(image, (size, size))
#         img = np.stack((img,)*3, axis=-1)
#         batch_images.append(img)

#     batch_preds = model.predict(np.array(batch_images)).flatten()
#     preds.extend(batch_preds)
# print(batch_preds)
# # print(time.time() - start_time)

In [14]:
def extract_roi(image):
    if len(image.shape) > 2:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY).astype(np.uint8)
    else:
        gray = image.astype(np.uint8)

#     ret, gray = cv2.threshold(gray, 50, 255, cv2.THRESH_BINARY_INV)
#     gray = cv2.morphologyEx(gray, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (128, 128)))
#     gray = cv2.bitwise_not(gray)
    
    contours, hierarchy = cv2.findContours(gray, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contour = max(contours, key = cv2.contourArea)
    
    x, y, w, h = cv2.boundingRect(contour)
    roi = image[y: y + h, x: x + w]

    return roi


def resize_image(image, width=None, height=None, inter=cv2.INTER_LINEAR):
    (h, w) = image.shape[:2]

    if width is None and height is None:
        return image

    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)    
    else:
        r = width / float(w)
        dim = (width, int(h * r))

    image = cv2.resize(image, dim, interpolation=inter)

    return image


def read_dicom(path, image_size=None, voi_lut=True, fix_monochrome=True, keep_aspect_ratio=True, crop_roi=True):

    dicom = dicoml.open(str(path))
    data = dicom.pixelData()

    if fix_monochrome and dicom.PhotometricInterpretation == 'MONOCHROME1':
        data = np.amax(data) - data
    
    if crop_roi:
        data = extract_roi(data)
        
    data = (data - data.min()) / (data.max() - data.min())
    data = (data * 255).astype(np.uint8)
#     data = cv2.cvtColor(data, cv2.COLOR_GRAY2RGB)
    data = cv2.resize(data, (224, 224))
    data = np.stack((data,)*3, axis=-1)

    return data

In [15]:
# import time
# start_time = time.time()

batch_size = 4  # adjust this based on your system's capabilities
preds = []

image_size = 224
voi_lut = False
fix_monochrome = True
keep_aspect_ratio = False
crop_roi = True

for i in range(0, len(test_df), batch_size):
    batch_paths = test_df.image_path[i:i+batch_size]
    batch_images = []

    for path in batch_paths:
        img = read_dicom(path, image_size, voi_lut, fix_monochrome, keep_aspect_ratio, crop_roi)
        batch_images.append(img)

    batch_preds = model.predict(np.array(batch_images)).flatten()
    preds.extend(batch_preds)
print(batch_preds)
# print(time.time() - start_time)

[0.03318155 0.05406421 0.02383739 0.01932296]


In [16]:
sub = pd.DataFrame()
sub['prediction_id'] = test_df.prediction_id
sub['cancer'] = preds
sub = sub.groupby("prediction_id").mean().reset_index()
sub["cancer"] = (sub["cancer"] > 0.4).astype(int)
sub.to_csv('submission.csv', index= False)
sub.head()

Unnamed: 0,prediction_id,cancer
0,10008_L,0
1,10008_R,0
