In [1]:
!cp /kaggle/input/gdcm-conda-install/gdcm.tar .
!tar -xvzf gdcm.tar
!conda install --offline ./gdcm/gdcm-2.8.9-py37h71b2a6d_0.tar.bz2
!rm -rf ./gdcm.tar

gdcm/
gdcm/conda-4.8.4-py37hc8dfbb8_2.tar.bz2
gdcm/gdcm-2.8.9-py37h71b2a6d_0.tar.bz2
gdcm/libjpeg-turbo-2.0.3-h516909a_1.tar.bz2

Downloading and Extracting Packages
######################################################################## | 100% 
Preparing transaction: - done
Verifying transaction: | / done
Executing transaction: \ done


In [2]:
import sys
sys.path.append('/kaggle/input/efficientnet-keras-dataset/efficientnet_kaggle')
import efficientnet.tfkeras as efn

In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
from glob import glob
import gc
from tqdm import tqdm
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
from tensorflow.keras import Model, Sequential
import cv2
import math
from PIL import Image
from shutil import copytree
from tensorflow.keras import Model 
from tensorflow.keras import layers

In [4]:
def dicom2array(path, voi_lut=True, fix_monochrome=True, yolo=False):
    dicom = pydicom.read_file(path)
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    if yolo:
        return data
    image = cv2.cvtColor(data, cv2.COLOR_GRAY2RGB)
    return image

In [5]:
filepaths = glob('/kaggle/input/siim-covid19-detection/test/**/*dcm',recursive=True)
test_df = pd.DataFrame(data={'filepath': sorted(filepaths)})
test_df['image_id'] = test_df.filepath.map(lambda x: x.split('/')[-1].replace('.dcm', '')+'_image')
test_df['study_id'] = test_df.filepath.map(lambda x: x.split('/')[-3].replace('.dcm', '')+'_study')
test_df.head()

Unnamed: 0,filepath,image_id,study_id
0,/kaggle/input/siim-covid19-detection/test/0018...,3dcdfc352a06_image,00188a671292_study
1,/kaggle/input/siim-covid19-detection/test/004b...,c39146cbda47_image,004bd59708be_study
2,/kaggle/input/siim-covid19-detection/test/0050...,951211f8e1bb_image,00508faccd39_study
3,/kaggle/input/siim-covid19-detection/test/0064...,5e0e7acd9c7d_image,006486aa80b2_study
4,/kaggle/input/siim-covid19-detection/test/0065...,5b8ee5baa1d5_image,00655178fdfc_study


In [6]:
def image_process(image_array, image_size=(600, 600)):
    image = tf.cast(image_array, tf.float32) / 255.
    image = tf.image.resize(image, image_size)
    return image

In [7]:
def apply_augmentation(image):
    image = tf.image.random_flip_up_down(image)
    image = tf.image.random_flip_left_right(image)
    return image

In [8]:
BATCH_SIZE = 32

In [9]:
class TestDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, image_paths, TTA=None, batch_size=BATCH_SIZE):
        self.image_paths = image_paths
        self.batch_size = BATCH_SIZE
        self.TTA = TTA
        
    def __len__(self):
        return math.ceil(len(self.image_paths) / self.batch_size)
    
    def __getitem__(self, indx):
        images = self.image_paths[indx*self.batch_size:(indx+1)*self.batch_size]
        images_batch = np.array(list(map(dicom2array, images)))
        images_batch = np.array(list(map(image_process, images_batch)))
        if self.TTA:
            images_batch = np.array(list(map(apply_augmentation, images_batch)))
        return images_batch

In [10]:
model = tf.keras.Sequential([
    efn.EfficientNetB7(
        input_shape=(600, 600, 3),
        weights='imagenet',
        include_top=False),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(4, activation='softmax')
])

In [11]:
test_dataset = TestDataGenerator(test_df.filepath.tolist())

In [12]:
num_folds = 5
predict = []
for i in tqdm(range(num_folds)):
    model_path = f'../input/slim-weights-effnet/model{i}.h5'
    model.load_weights(model_path)
    pred = model.predict(test_dataset)
    predict.append(pred)
    del pred 
    gc.collect()
predictions = np.mean(predict, axis=0)

  if sys.path[0] == '':
100%|██████████| 5/5 [36:34<00:00, 438.82s/it]


In [13]:
del test_dataset, predict, model
gc.collect()

76587

In [14]:
class CovidNet(Model):
    def __init__(self):
        super(CovidNet, self).__init__()
        self.base = efn.EfficientNetB1(input_shape=(512, 512, 3),
                                                  include_top=False,
                                                  weights='imagenet')

        self.base = Model(
                [self.base.inputs], 
                [self.base.get_layer('top_activation').output, self.base.output]
            )
        

        self.tail = Sequential(
            [
                layers.GlobalAveragePooling2D(),
                layers.Dropout(0.2),
                layers.BatchNormalization(),
                layers.Dense(4),
                layers.Softmax()
            ]
        )
        

        self.msk = Sequential(
            [
                layers.Conv2D(filters=512, kernel_size=(1, 1), 
                              strides=(1, 1), padding="same"),
                layers.ReLU(),
                layers.BatchNormalization(),
                layers.Conv2D(filters=1, kernel_size=(1,1), padding="same")
            ]
        )

    def call(self, inputs, training=None, **kwargs):
        segg, clss = self.base(inputs['input'])

        return {
            'clss': self.tail(clss), 
            'segg': self.msk(segg)
        }

In [15]:
class TestDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, image_paths, TTA=None, batch_size=BATCH_SIZE):
        self.image_paths = image_paths
        self.batch_size = BATCH_SIZE
        self.TTA = TTA
        
    def __len__(self):
        return math.ceil(len(self.image_paths) / self.batch_size)
    
    def __getitem__(self, indx):
        images = self.image_paths[indx*self.batch_size:(indx+1)*self.batch_size]
        images_batch = np.array(list(map(dicom2array, images)))
        
        def image_process(image_array, image_size=(512, 512)):
            image = tf.cast(image_array, tf.float32) / 255.
            image = tf.image.resize(image, image_size)
            return image
        
        images_batch = np.array(list(map(image_process, images_batch)))
        if self.TTA:
            images_batch = np.array(list(map(apply_augmentation, images_batch)))
        return {'input': images_batch}

In [16]:
test_dataset = TestDataGenerator(test_df.filepath.tolist())

In [17]:
model = CovidNet()
model.build(input_shape={'input': (None, 512, 512, 3)})
model.summary()

Model: "covid_net"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
model (Functional)           [(None, 16, 16, 1280), (N 6575232   
_________________________________________________________________
sequential_1 (Sequential)    (None, 4)                 10244     
_________________________________________________________________
sequential_2 (Sequential)    (None, 16, 16, 1)         658433    
Total params: 7,243,909
Trainable params: 7,178,277
Non-trainable params: 65,632
_________________________________________________________________


In [18]:
num_folds = 3
predict = []
for i in tqdm(range(num_folds)):
    model_path = f'../input/mt-weights/auxloss{i}.h5'
    model.load_weights(model_path)
    pred = model.predict(test_dataset)['clss']
    predict.append(pred)
    del pred 
    gc.collect()
mt_predictions = np.mean(predict, axis=0)

  if sys.path[0] == '':
100%|██████████| 3/3 [21:37<00:00, 432.43s/it]


In [19]:
final_pred = np.mean([predictions,mt_predictions], axis=0)

In [20]:
data = {'id': test_df.study_id.tolist()}
study_df = pd.DataFrame(data=data)
study_df['PredictionString'] = None
for i in tqdm(range(final_pred.shape[0])):
    study_df.loc[i, 'PredictionString'] = f'negative {final_pred[i][0]:0.3f} 0 0 1 1 \
    typical {final_pred[i][1]:0.3f} 0 0 1 1 \
    indeterminate {final_pred[i][2]:0.3f} 0 0 1 1 \
    atypical {final_pred[i][3]:0.3f} 0 0 1 1'

study_df.sort_values(by=['id'])
image_df = pd.DataFrame({'id':test_df.image_id.tolist(),
                         'PredictionString':["none 1 0 0 1 1"]*len(test_df.image_id.tolist())})
study_df = study_df.drop_duplicates(subset=['id'])
final_df = pd.concat([study_df.sort_values(by=['id']), image_df])
final_df.to_csv('submission.csv', index=False)

100%|██████████| 1263/1263 [00:00<00:00, 3265.90it/s]
