In [1]:
!cp -r ../input/siim-covid-packages .
!mv ./siim-covid-packages/efficientnet-pytorch-0.7.0-pyhd8ed1ab_0.tar.xyz ./siim-covid-packages/efficientnet-pytorch-0.7.0-pyhd8ed1ab_0.tar.bz2
!mv ./siim-covid-packages/pretrainedmodels-0.7.4-py37hc8dfbb8_0.tar.xyz ./siim-covid-packages/pretrainedmodels-0.7.4-py37hc8dfbb8_0.tar.bz2
!mv ./siim-covid-packages/antlr4-python3-runtime-4.8.tar.xyz ./siim-covid-packages/antlr4-python3-runtime-4.8.tar.gz
!mv ./siim-covid-packages/pycocotools-2.0.2.tar.xyz ./siim-covid-packages/pycocotools-2.0.2.tar.gz

!pip install ./siim-covid-packages/python_gdcm-3.0.9.0-cp37-cp37m-manylinux2014_x86_64.whl
!conda install ./siim-covid-packages/efficientnet-pytorch-0.7.0-pyhd8ed1ab_0.tar.bz2
!conda install ./siim-covid-packages/pretrainedmodels-0.7.4-py37hc8dfbb8_0.tar.bz2
!pip install ./siim-covid-packages/timm-0.4.5-py3-none-any.whl
!pip install ./siim-covid-packages/antlr4-python3-runtime-4.8.tar.gz
!pip install ./siim-covid-packages/pycocotools-2.0.2.tar.gz
!pip install ./siim-covid-packages/omegaconf-2.0.6-py3-none-any.whl
!pip install ./siim-covid-packages/ensemble_boxes-1.0.6-py3-none-any.whl
!rm -rf ./siim-covid-packages

Processing ./siim-covid-packages/python_gdcm-3.0.9.0-cp37-cp37m-manylinux2014_x86_64.whl
Installing collected packages: python-gdcm
Successfully installed python-gdcm-3.0.9.0

Downloading and Extracting Packages
######################################################################## | 100% 
Preparing transaction: - done
Verifying transaction: | done
Executing transaction: - \ | / - \ | done

Downloading and Extracting Packages
######################################################################## | 100% 
Preparing transaction: - done
Verifying transaction: | done
Executing transaction: - done
Processing ./siim-covid-packages/timm-0.4.5-py3-none-any.whl
Installing collected packages: timm
Successfully installed timm-0.4.5
Processing ./siim-covid-packages/antlr4-python3-runtime-4.8.tar.gz
Building wheels for collected packages: antlr4-python3-runtime
  Building wheel for antlr4-python3-runtime (setup.py) ... [?25l- \ done
[?25h  Creat

In [2]:
import pandas as pd
import os
import numpy as np
import pydicom
import cv2
import torch
import gc
import pickle
from tqdm import tqdm
import random
from ensemble_boxes import weighted_boxes_fusion
from multiprocessing import Pool
from pydicom.pixel_data_handlers.util import apply_voi_lut

In [3]:
def load_dict(name):
    with open(name, 'rb') as f:
        return pickle.load(f)

In [4]:
classes = [
    'Negative for Pneumonia',
    'Typical Appearance',
    'Indeterminate Appearance',
    'Atypical Appearance'
]

study_submission_classes = {
    'Negative for Pneumonia': 'negative',
    'Typical Appearance': 'typical',
    'Indeterminate Appearance': 'indeterminate',
    'Atypical Appearance': 'atypical'
}

In [5]:
#### just pick 14 study in public test set intead of 1214 study to save time, don't need to run 1214 study
#### now just run kernel on private test study + 14 public test study

public_test_meta_df = pd.read_csv('../input/siim-covid-public-test/test_meta.csv')
public_test_14_study = list(np.unique(public_test_meta_df.studyid.values))[0:14]
public_test_1200_study = []

public_test_submisison_df = pd.read_csv('../input/siim-covid-public-test/submission_0.656_20210806.csv')
public_test_1200_study_level_output = []
public_test_1200_image_level_output = []
for studyid, grp in tqdm(public_test_meta_df.groupby('studyid')):
    if studyid in public_test_14_study:
        continue
        
    public_test_1200_study.append(studyid)
    
    study_tmp_df = public_test_submisison_df.loc[public_test_submisison_df['id'] == '{}_study'.format(studyid)]
    assert len(study_tmp_df) == 1
    public_test_1200_study_level_output.append(['{}_study'.format(studyid), study_tmp_df.PredictionString.values[0]])

    for _, row in grp.iterrows():
        image_tmp_df = public_test_submisison_df.loc[public_test_submisison_df['id'] == '{}_image'.format(row['imageid'])]
        assert len(image_tmp_df) == 1
        public_test_1200_image_level_output.append(['{}_image'.format(row['imageid']), image_tmp_df.PredictionString.values[0]])

public_test_1200_output = public_test_1200_study_level_output + public_test_1200_image_level_output
public_test_1200_submission_df = pd.DataFrame(data=np.array(public_test_1200_output), columns=['id','PredictionString'])
public_test_1200_submission_df.to_csv('./submission_1200_study.csv', index=False)
print(public_test_1200_submission_df.shape, len(public_test_14_study), len(public_test_1200_study))

del public_test_1200_output
del public_test_1200_study_level_output
del public_test_1200_image_level_output
del public_test_submisison_df
del public_test_1200_submission_df

gc.collect()

100%|██████████| 1214/1214 [00:02<00:00, 565.16it/s]


(2449, 2) 14 1200


2571

In [6]:
### extract dicom to image
os.makedirs('./images', exist_ok=True)
os.makedirs('./csv', exist_ok=True)

class ME:
    def __init__(self, StudyInstanceUID, file_path):
        self.StudyInstanceUID = StudyInstanceUID
        self.file_path = file_path

def dicom2image(ele):
    image_id = ele.file_path.split('/')[-1].split('.')[0]
    dcm_file = pydicom.read_file(ele.file_path)
    
    PatientID = dcm_file.PatientID
    series_id = dcm_file.SeriesInstanceUID
    assert image_id == dcm_file.SOPInstanceUID
    assert ele.StudyInstanceUID == dcm_file.StudyInstanceUID

    data = apply_voi_lut(dcm_file.pixel_array, dcm_file)

    if dcm_file.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data

    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)

    image_path = './images/{}.png'.format(image_id)
    cv2.imwrite(image_path, data)
    return [PatientID, ele.StudyInstanceUID, series_id, image_id, dcm_file.SeriesNumber, dcm_file.InstanceNumber]

sample_submission_df = pd.read_csv('../input/siim-covid19-detection/sample_submission.csv')

meles = []
for id in np.unique(sample_submission_df.id.values):
    if '_study' not in id:
        continue
    StudyInstanceUID = id.replace('_study', '')

    if StudyInstanceUID in public_test_1200_study:
        continue

    for rdir, _, files in os.walk('../input/siim-covid19-detection/test/{}'.format(StudyInstanceUID)):
        for file in files:
            file_path = os.path.join(rdir, file)
            filename, file_extension = os.path.splitext(file_path)
            if file_extension in ['.dcm', '.dicom']:
                meles.append(ME(StudyInstanceUID, file_path))

p = Pool(4)
results = p.map(func=dicom2image, iterable = meles)
p.close()
test_df = pd.DataFrame(data=np.array(results), columns=['patientid', 'studyid', 'series_id', 'imageid', 'SeriesNumber', 'InstanceNumber'])
test_df.to_csv('./csv/test_df.csv', index=False)

In [7]:
############################################## crop lung area ##############################################
### yolov5
!cp -r ../input/siim-covid-src/detection_yolov5/* .
!python predict_lung.py --ckpt_dir ../input/siim-covid-checkpoints/detection_yolov5_lung \
                        --output_dir ./det_predictions \
                        --output_file_name yolov5_lung_test_pred.pth \
                        --fold 3 \
                        --source ./images \
                        --img-size 512 \
                        --conf-thres 0.05 \
                        --iou-thres 0.5 \
                        --device 0
!rm -rf ./utils ./models ./data *.py ./__pycache__

Namespace(agnostic_nms=False, ckpt_dir='../input/siim-covid-checkpoints/detection_yolov5_lung', conf_thres=0.05, device='0', exist_ok=False, fold=3, hide_conf=False, hide_labels=False, img_size=512, iou_thres=0.5, line_thickness=3, name='exp', nosave=False, output_dir='./det_predictions', output_file_name='yolov5_lung_test_pred.pth', project='runs/detect', save_conf=False, save_crop=False, save_txt=False, source='./images', update=False, view_img=False, workers=16)
100%|███████████████████████████████████████████| 14/14 [00:02<00:00,  5.83it/s]


In [8]:
############################################## study level prediction ##############################################
!cp -r ../input/siim-covid-src/classification_aux/* .
!python predict_test.py --test_df ./csv/test_df.csv \
                        --ckpt_dir ../input/siim-covid-checkpoints/classification_aux_v4 \
                        --image_dir ./images \
                        --lung_pred_path ./det_predictions/yolov5_lung_test_pred.pth \
                        --output_dir ./cls_predictions \
                        --cfg ./configs/eb5_512_deeplabv3plus.yaml \
                        --folds 0 1 2 3 4 \
                        --num_tta 8 \
                        --batch-size 96 \
                        --workers 2
                        
!python predict_test.py --test_df ./csv/test_df.csv \
                        --ckpt_dir ../input/siim-covid-checkpoints/classification_aux_v4 \
                        --image_dir ./images \
                        --lung_pred_path ./det_predictions/yolov5_lung_test_pred.pth \
                        --output_dir ./cls_predictions \
                        --cfg ./configs/seresnet152d_512_unet.yaml \
                        --folds 0 1 2 3 4 \
                        --num_tta 8 \
                        --batch-size 96 \
                        --workers 2

!python predict_test.py --test_df ./csv/test_df.csv \
                        --ckpt_dir ../input/siim-covid-checkpoints/classification_aux_v4 \
                        --image_dir ./images \
                        --lung_pred_path ./det_predictions/yolov5_lung_test_pred.pth \
                        --output_dir ./cls_predictions \
                        --cfg ./configs/eb6_448_linknet.yaml \
                        --folds 0 1 2 3 4 \
                        --num_tta 8 \
                        --batch-size 96 \
                        --workers 2

!python predict_test.py --test_df ./csv/test_df.csv \
                        --ckpt_dir ../input/siim-covid-checkpoints/classification_aux_v4 \
                        --image_dir ./images \
                        --lung_pred_path ./det_predictions/yolov5_lung_test_pred.pth \
                        --output_dir ./cls_predictions \
                        --cfg ./configs/eb7_512_unetplusplus.yaml \
                        --folds 0 1 2 3 4 \
                        --num_tta 8 \
                        --batch-size 64 \
                        --workers 2

!rm -rf *.py ./segmentation_models_pytorch ./configs ./__pycache__

Namespace(batch_size=96, cfg='./configs/eb5_512_deeplabv3plus.yaml', ckpt_dir='../input/siim-covid-checkpoints/classification_aux_v4', folds=[0, 1, 2, 3, 4], image_dir='./images', lung_pred_path='./det_predictions/yolov5_lung_test_pred.pth', num_tta=8, output_dir='./cls_predictions', test_df='./csv/test_df.csv', workers=2)
{'encoder_name': 'timm-efficientnet-b5', 'encoder_weights': 'noisy-student', 'in_features': 2048, 'aux_image_size': 512, 'decoder': 'deeplabv3plus', 'decoder_channels': 256}
Test size: 14
100%|█████████████████████████████████████████████| 1/1 [00:16<00:00, 16.64s/it]
Namespace(batch_size=96, cfg='./configs/seresnet152d_512_unet.yaml', ckpt_dir='../input/siim-covid-checkpoints/classification_aux_v4', folds=[0, 1, 2, 3, 4], image_dir='./images', lung_pred_path='./det_predictions/yolov5_lung_test_pred.pth', num_tta=8, output_dir='./cls_predictions', test_df='./csv/test_df.csv', workers=2)
{'encoder_name': 'timm-seresnet152d_320', 'encoder_weights': 'imagenet', 'in

In [9]:
############################################## image level prediction ##############################################
### yolov5
!cp -r ../input/siim-covid-src/detection_yolov5/* .
!python predict.py  --ckpt_dir ../input/siim-covid-checkpoints/detection_yolov5_v3 \
                    --output_dir ./det_predictions \
                    --folds 0 1 2 3 4 \
                    --source ./images \
                    --img-size 768 \
                    --conf-thres 0.0005 \
                    --iou-thres 0.5 \
                    --device 0
!rm -rf ./utils ./models ./data *.py ./__pycache__

### faster rcnn
!cp -r ../input/siim-covid-src/detection_fasterrcnn/* .
!python predict_test.py --test_df ./csv/test_df.csv \
                        --ckpt_dir ../input/siim-covid-checkpoints/detection_fasterrcnn_v3 \
                        --image_dir ./images \
                        --output_dir ./det_predictions \
                        --cfg ./configs/resnet200d.yaml \
                        --folds 0 1 2 3 4 \
                        --batch-size 32 \
                        --workers 2
!python predict_test.py --test_df ./csv/test_df.csv \
                        --ckpt_dir ../input/siim-covid-checkpoints/detection_fasterrcnn_v3 \
                        --image_dir ./images \
                        --output_dir ./det_predictions \
                        --cfg ./configs/resnet101d.yaml \
                        --folds 0 1 2 3 4 \
                        --batch-size 24 \
                        --workers 2
!rm -rf *.py ./configs ./__pycache__

### efficient det
!cp -r ../input/siim-covid-src/detection_efficientdet/* .
!python predict_test.py --model tf_efficientdet_d7 \
                        --amp --use-ema --num-classes 1 --native-amp -b 24 \
                        --output_dir ./det_predictions \
                        --test_df ./csv/test_df.csv \
                        --ckpt_dir ../input/siim-covid-checkpoints/detection_efficientdet_v3 \
                        --image_dir ./images \
                        --folds 0 1 2 3 4 \
                        --image-size 768
!rm -rf ./effdet *.py ./__pycache__

Namespace(agnostic_nms=False, ckpt_dir='../input/siim-covid-checkpoints/detection_yolov5_v3', conf_thres=0.0005, device='0', exist_ok=False, folds=[0, 1, 2, 3, 4], hide_conf=False, hide_labels=False, img_size=768, iou_thres=0.5, line_thickness=3, name='exp', nosave=False, output_dir='./det_predictions', project='runs/detect', save_conf=False, save_crop=False, save_txt=False, source='./images', update=False, view_img=False, workers=16)
******************** Fold 0 ********************
******************** Fold 1 ********************
******************** Fold 2 ********************
******************** Fold 3 ********************
******************** Fold 4 ********************
100%|███████████████████████████████████████████| 14/14 [00:17<00:00,  1.23s/it]
Namespace(batch_size=32, cfg='./configs/resnet200d.yaml', ckpt_dir='../input/siim-covid-checkpoints/detection_fasterrcnn_v3', folds=[0, 1, 2, 3, 4], image_dir='./images', output_dir='./det_predictions', test_df='./csv/test_df.cs

In [10]:
### remove temporary image dir
!rm -rf ./images

In [11]:
submission_study_output = []
submission_image_output = []
############################################## combine study + image level prediction ##############################################
test_df = pd.read_csv('./csv/test_df.csv')

eb5_study_pred = torch.load('./cls_predictions/timm-efficientnet-b5_512_deeplabv3plus_aux_fold0_1_2_3_4_test_pred.pth')['pred_dict']
eb6_study_pred = torch.load('./cls_predictions/timm-efficientnet-b6_448_linknet_aux_fold0_1_2_3_4_test_pred.pth')['pred_dict']
eb7_study_pred = torch.load('./cls_predictions/timm-efficientnet-b7_512_unetplusplus_aux_fold0_1_2_3_4_test_pred.pth')['pred_dict']
sr152_study_pred = torch.load('./cls_predictions/timm-seresnet152d_320_512_unet_aux_fold0_1_2_3_4_test_pred.pth')['pred_dict']

for studyid, grp in test_df.groupby('studyid'):
    preds = []
    for _, row in grp.iterrows():
        pred =  0.3*eb5_study_pred[row['imageid']] + \
                0.2*eb6_study_pred[row['imageid']] + \
                0.2*eb7_study_pred[row['imageid']] + \
                0.3*sr152_study_pred[row['imageid']]

        preds.append(pred)
        
        boxes1, scores1, labels1, img_width, img_height = load_dict('./det_predictions/tf_efficientdet_d7_768_fold0_1_2_3_4_test_pred/{}.pkl'.format(row['imageid']))
        
        boxes2, scores2, labels2, img_width2, img_height2 = load_dict('./det_predictions/yolov5x6_768_fold0_1_2_3_4_test_pred/{}.pkl'.format(row['imageid']))
        assert img_width2 == img_width and img_height2 == img_height

        boxes3, scores3, labels3, img_width3, img_height3 = load_dict('./det_predictions/resnet200d_768_fold0_1_2_3_4_test_pred/{}.pkl'.format(row['imageid']))
        assert img_width3 == img_width and img_height3 == img_height
        
        boxes4, scores4, labels4, img_width4, img_height4 = load_dict('./det_predictions/resnet101d_1024_fold0_1_2_3_4_test_pred/{}.pkl'.format(row['imageid']))
        assert img_width4 == img_width and img_height4 == img_height
        
        boxes = boxes1 + boxes2 + boxes3 + boxes4
        labels = labels1 + labels2 + labels3 + labels4

        ### scale score of fasterrcnn to effdet and yolo score
        scores3_tmp = []
        for s in scores3:
            tmp = [x*0.78 for x in s]
            scores3_tmp.append(tmp)
        scores3 = scores3_tmp
        
        scores4_tmp = []
        for s in scores4:
            tmp = [x*0.78 for x in s]
            scores4_tmp.append(tmp)
        scores4 = scores4_tmp

        scores = scores1 + scores2 + scores3 + scores4

        boxes, scores, labels = weighted_boxes_fusion(boxes, scores, labels, weights=None, iou_thr=0.6)
        assert np.mean(labels) == 0
        boxes = boxes.clip(0,1)

        boxes[:,[0,2]] = boxes[:,[0,2]]*float(img_width)
        boxes[:,[1,3]] = boxes[:,[1,3]]*float(img_height)
        
        neg_image_pred = 'none {} 0 0 1 1'.format(pred[0])
        opacity_image_pred = []
        for box, score in zip(boxes, scores):
            opacity_image_pred.append('opacity {} {} {} {} {}'.format(score, box[0], box[1], box[2],box[3]))
        image_pred = ' '.join([neg_image_pred] + opacity_image_pred)
        submission_image_output.append(['{}_image'.format(row['imageid']), image_pred])

    preds = np.array(preds)
    preds = np.mean(preds, axis=0)

    study_preds = []
    for clsidx, clsname in enumerate(classes):
        study_preds.append('{} {} 0 0 1 1'.format(study_submission_classes[clsname], preds[clsidx]))
    study_preds = ' '.join(study_preds)
    submission_study_output.append(['{}_study'.format(studyid), study_preds])

del eb5_study_pred
del eb6_study_pred
del eb7_study_pred
del sr152_study_pred

submission_output = submission_study_output + submission_image_output
sub_df = pd.DataFrame(data=np.array(submission_output), columns=['id','PredictionString'])

public_test_1200_submission_df = pd.read_csv('./submission_1200_study.csv')

sub_df = pd.concat([public_test_1200_submission_df, sub_df], ignore_index=True)
sub_df.to_csv('submission.csv', index=False)
print(sub_df.shape)

del submission_output
del submission_study_output
del submission_image_output

gc.collect()

(2477, 2)


0

In [12]:
### remove temporary prediction dir
!rm -rf ./csv ./det_predictions ./cls_predictions ./submission_1200_study.csv

In [13]:
sub_df.head()

Unnamed: 0,id,PredictionString
0,028abd3504b6_study,negative 0.1678466796875 0 0 1 1 typical 0.645...
1,02ee3a9820eb_study,negative 0.01250457763671875 0 0 1 1 typical 0...
2,0321bb7f84b5_study,negative 0.9228515625 0 0 1 1 typical 0.033172...
3,03e0a59d9b8a_study,negative 0.1072998046875 0 0 1 1 typical 0.583...
4,03fc9ec0dba8_study,negative 0.9326171875 0 0 1 1 typical 0.021499...
