# SIIM-FISABIO-RSNA COVID-19 Detection using EfficientDet (Inference)
  
### Performance Comparison of Similar Models

<figure>
<img src="https://blog.roboflow.com/content/images/2020/06/yolov5-performance.png" style="width:700px">
</figure>

Check out the [EfficientDet Research Paper ](https://arxiv.org/pdf/1911.09070.pdf)  


### EfficientDet Structure
![image](https://aihub-storage.s3.ap-northeast-2.amazonaws.com/file/efficientdet.png)

### Install Dependencies

In [1]:
!conda install gdcm -c conda-forge -y
!pip install pycocotools numpy opencv-python tqdm tensorboard tensorboardX pyyaml webcolors matplotlib

Collecting package metadata (current_repodata.json): - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | done
Solving environment: - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | 

### Install EfficientDet

In [2]:
!git clone https://github.com/zylo117/Yet-Another-EfficientDet-Pytorch

import os
os.chdir("Yet-Another-EfficientDet-Pytorch")

Cloning into 'Yet-Another-EfficientDet-Pytorch'...
remote: Enumerating objects: 765, done.[K
remote: Total 765 (delta 0), reused 0 (delta 0), pack-reused 765[K
Receiving objects: 100% (765/765), 8.84 MiB | 39.38 MiB/s, done.
Resolving deltas: 100% (437/437), done.


In [3]:
# load checkpoint
! mkdir weights
! wget https://github.com/zylo117/Yet-Another-EfficientDet-Pytorch/releases/download/1.0/efficientdet-d0.pth -O weights/efficientdet-d0.pth

--2021-11-07 07:22:38--  https://github.com/zylo117/Yet-Another-EfficientDet-Pytorch/releases/download/1.0/efficientdet-d0.pth
Resolving github.com (github.com)... 140.82.114.3
Connecting to github.com (github.com)|140.82.114.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://github-releases.githubusercontent.com/253385242/9b9d2100-791d-11ea-80b2-d35899cf95fe?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20211107%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20211107T072239Z&X-Amz-Expires=300&X-Amz-Signature=5fe5a18f22b355dc72231ba06510f9433b47e5feaf7124c7134749e0408affa0&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=253385242&response-content-disposition=attachment%3B%20filename%3Defficientdet-d0.pth&response-content-type=application%2Foctet-stream [following]
--2021-11-07 07:22:39--  https://github-releases.githubusercontent.com/253385242/9b9d2100-791d-11ea-80b2-d35899cf95fe?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-A

In [4]:
siim_yml = '''
project_name: siim  # also the folder name of the dataset that under data_path folder
train_set: train
val_set: val
num_gpus: 1

# mean and std in RGB order, actually this part should remain unchanged as long as your dataset is similar to coco.
mean: [ 0.485, 0.456, 0.406 ]
std: [ 0.229, 0.224, 0.225 ]

# this anchor is adapted to the dataset
anchors_scales: '[2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]'
anchors_ratios: '[(1.0, 1.0), (1.3, 0.8), (1.9, 0.5)]'

obj_list: ['typical', 'indeterminate', 'atypical']
'''
with open('projects/siim.yml', 'w') as f:
    f.write(siim_yml)

### Convert to 256x256px Image

In [5]:
import os
from PIL import Image
import pandas as pd
from tqdm.auto import tqdm
import numpy as np
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

import torch

def read_xray(path, voi_lut=False, fix_monochrome=True):
    # Original from: https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
    dicom = pydicom.read_file(path)
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to
    # "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array

    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data

    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)

    return data


def resize(array, size, keep_ratio=False, resample=Image.LANCZOS):
    # Original from: https://www.kaggle.com/xhlulu/vinbigdata-process-and-resize-to-image
    im = Image.fromarray(array)

    if keep_ratio:
        im.thumbnail((size, size), resample)
    else:
        im = im.resize((size, size), resample)

    return im

In [6]:
from glob import glob
INPUT_PATH = "/kaggle/input/siim-covid19-detection/"

for split in ["test", "train"]:
    save_dir = f"datasets/siim/{split}/"

    os.makedirs(save_dir, exist_ok=True)

    for path in tqdm(glob(INPUT_PATH + split + '/*/*/*')):
        # set keep_ratio=True to have original aspect ratio
        xray = read_xray(path)
        im = resize(xray, size=256)
        im.save(os.path.join(save_dir, path.split('/')[-1][:-3]+'jpg'))

  0%|          | 0/1263 [00:00<?, ?it/s]

  0%|          | 0/6334 [00:00<?, ?it/s]



### Preprocessing

In [7]:
import os
import pandas as pd
from glob import glob
import pydicom

In [8]:
train_study = pd.read_csv(INPUT_PATH + 'train_study_level.csv')
train_image = pd.read_csv(INPUT_PATH + 'train_image_level.csv')

In [9]:
train_study.head()

Unnamed: 0,id,Negative for Pneumonia,Typical Appearance,Indeterminate Appearance,Atypical Appearance
0,00086460a852_study,0,1,0,0
1,000c9c05fd14_study,0,0,0,1
2,00292f8c37bd_study,1,0,0,0
3,005057b3f880_study,1,0,0,0
4,0051d9b12e72_study,0,0,0,1


Shorten the name.

In [10]:
train_study = train_study.rename(columns = {
    'Negative for Pneumonia': 'Negative', 'Typical Appearance': 'Typical',
    'Indeterminate Appearance': 'Indeterminate', 'Atypical Appearance': 'Atypical'},
                                       inplace = False)
train_study['StudyInstanceUID'] = train_study['id'].str[:-6]
train_study.drop(columns=['id'], inplace=True)
train_study.head()

Unnamed: 0,Negative,Typical,Indeterminate,Atypical,StudyInstanceUID
0,0,1,0,0,00086460a852
1,0,0,0,1,000c9c05fd14
2,1,0,0,0,00292f8c37bd
3,1,0,0,0,005057b3f880
4,0,0,0,1,0051d9b12e72


In [11]:
train_image.head()

Unnamed: 0,id,boxes,label,StudyInstanceUID
0,000a312787f2_image,"[{'x': 789.28836, 'y': 582.43035, 'width': 102...",opacity 1 789.28836 582.43035 1815.94498 2499....,5776db0cec75
1,000c3a3f293f_image,,none 1 0 0 1 1,ff0879eb20ed
2,0012ff7358bc_image,"[{'x': 677.42216, 'y': 197.97662, 'width': 867...",opacity 1 677.42216 197.97662 1545.21983 1197....,9d514ce429a7
3,001398f4ff4f_image,"[{'x': 2729, 'y': 2181.33331, 'width': 948.000...",opacity 1 2729 2181.33331 3677.00012 2785.33331,28dddc8559b2
4,001bd15d1891_image,"[{'x': 623.23328, 'y': 1050, 'width': 714, 'he...",opacity 1 623.23328 1050 1337.23328 2156 opaci...,dfd9fdd85a3e


Merge train_study dataframe.

In [12]:
train_image = train_image.merge(train_study, on='StudyInstanceUID')
train_image['id'] = train_image['id'].str[:-6]
train_image.head()

Unnamed: 0,id,boxes,label,StudyInstanceUID,Negative,Typical,Indeterminate,Atypical
0,000a312787f2,"[{'x': 789.28836, 'y': 582.43035, 'width': 102...",opacity 1 789.28836 582.43035 1815.94498 2499....,5776db0cec75,0,1,0,0
1,000c3a3f293f,,none 1 0 0 1 1,ff0879eb20ed,1,0,0,0
2,0012ff7358bc,"[{'x': 677.42216, 'y': 197.97662, 'width': 867...",opacity 1 677.42216 197.97662 1545.21983 1197....,9d514ce429a7,0,1,0,0
3,001398f4ff4f,"[{'x': 2729, 'y': 2181.33331, 'width': 948.000...",opacity 1 2729 2181.33331 3677.00012 2785.33331,28dddc8559b2,0,0,0,1
4,001bd15d1891,"[{'x': 623.23328, 'y': 1050, 'width': 714, 'he...",opacity 1 623.23328 1050 1337.23328 2156 opaci...,dfd9fdd85a3e,0,1,0,0


Add path of image.

In [13]:
id_path = []
for i in glob('/kaggle/input/siim-covid19-detection/train/*/*/*'):
    id_path.append((i, i.split('/')[-1][:-4]))
id_path = pd.DataFrame(id_path, columns=['path', 'id'])
train_image = train_image.merge(id_path, on='id')
train_image.head()

Unnamed: 0,id,boxes,label,StudyInstanceUID,Negative,Typical,Indeterminate,Atypical,path
0,000a312787f2,"[{'x': 789.28836, 'y': 582.43035, 'width': 102...",opacity 1 789.28836 582.43035 1815.94498 2499....,5776db0cec75,0,1,0,0,/kaggle/input/siim-covid19-detection/train/577...
1,000c3a3f293f,,none 1 0 0 1 1,ff0879eb20ed,1,0,0,0,/kaggle/input/siim-covid19-detection/train/ff0...
2,0012ff7358bc,"[{'x': 677.42216, 'y': 197.97662, 'width': 867...",opacity 1 677.42216 197.97662 1545.21983 1197....,9d514ce429a7,0,1,0,0,/kaggle/input/siim-covid19-detection/train/9d5...
3,001398f4ff4f,"[{'x': 2729, 'y': 2181.33331, 'width': 948.000...",opacity 1 2729 2181.33331 3677.00012 2785.33331,28dddc8559b2,0,0,0,1,/kaggle/input/siim-covid19-detection/train/28d...
4,001bd15d1891,"[{'x': 623.23328, 'y': 1050, 'width': 714, 'he...",opacity 1 623.23328 1050 1337.23328 2156 opaci...,dfd9fdd85a3e,0,1,0,0,/kaggle/input/siim-covid19-detection/train/dfd...


In [14]:
train_image.iloc[0]['boxes']

"[{'x': 789.28836, 'y': 582.43035, 'width': 1026.65662, 'height': 1917.30292}, {'x': 2245.91208, 'y': 591.20528, 'width': 1094.66162, 'height': 1761.54944}]"

In [15]:
train_image.iloc[0]['label']

'opacity 1 789.28836 582.43035 1815.94498 2499.73327 opacity 1 2245.91208 591.20528 3340.5737 2352.75472'

boxes column includes x, y, width, height.

<img src="https://aihub-storage.s3.ap-northeast-2.amazonaws.com/file/%E1%84%89%E1%85%B3%E1%84%8F%E1%85%B3%E1%84%85%E1%85%B5%E1%86%AB%E1%84%89%E1%85%A3%E1%86%BA_2021-07-08_%E1%84%8B%E1%85%A9%E1%84%92%E1%85%AE_8.51.15.png" style="width:500px">

label column includes x1, y1, x2, y2.

<img src="https://aihub-storage.s3.ap-northeast-2.amazonaws.com/file/%E1%84%89%E1%85%B3%E1%84%8F%E1%85%B3%E1%84%85%E1%85%B5%E1%86%AB%E1%84%89%E1%85%A3%E1%86%BA_2021-07-08_%E1%84%8B%E1%85%A9%E1%84%92%E1%85%AE_8.54.13.png" style="width:500px">

In [16]:
pydicom.read_file(train_image.iloc[0]['path']).pixel_array.shape

(3488, 4256)

In [17]:
xy = []
for i, data in train_image.iterrows():
    xy.append(pydicom.read_file(data['path']).pixel_array.shape)
train_image[['xcell','ycell']] = xy
train_image.to_csv('datasets/train_image.csv', index=None)
train_image.head()



Unnamed: 0,id,boxes,label,StudyInstanceUID,Negative,Typical,Indeterminate,Atypical,path,xcell,ycell
0,000a312787f2,"[{'x': 789.28836, 'y': 582.43035, 'width': 102...",opacity 1 789.28836 582.43035 1815.94498 2499....,5776db0cec75,0,1,0,0,/kaggle/input/siim-covid19-detection/train/577...,3488,4256
1,000c3a3f293f,,none 1 0 0 1 1,ff0879eb20ed,1,0,0,0,/kaggle/input/siim-covid19-detection/train/ff0...,2320,2832
2,0012ff7358bc,"[{'x': 677.42216, 'y': 197.97662, 'width': 867...",opacity 1 677.42216 197.97662 1545.21983 1197....,9d514ce429a7,0,1,0,0,/kaggle/input/siim-covid19-detection/train/9d5...,2544,3056
3,001398f4ff4f,"[{'x': 2729, 'y': 2181.33331, 'width': 948.000...",opacity 1 2729 2181.33331 3677.00012 2785.33331,28dddc8559b2,0,0,0,1,/kaggle/input/siim-covid19-detection/train/28d...,3520,4280
4,001bd15d1891,"[{'x': 623.23328, 'y': 1050, 'width': 714, 'he...",opacity 1 623.23328 1050 1337.23328 2156 opaci...,dfd9fdd85a3e,0,1,0,0,/kaggle/input/siim-covid19-detection/train/dfd...,2800,3408


### Build Train and Validation Set

Make the validation set.

In [18]:
import random
import os
import shutil
import pandas as pd
import json

random.seed(481)
SRC_PATH = 'datasets/siim/train/'
TRG_PATH = 'datasets/siim/'
train_list = os.listdir(SRC_PATH)
random.shuffle(train_list)

import shutil
os.makedirs(TRG_PATH+'val', exist_ok=True)
for path in train_list[int(len(train_list)*0.8):]:
    shutil.move(SRC_PATH + path, TRG_PATH + 'val/' + path)

### Annotation Files

In [19]:
def anno(sets='train'):
    image_id = pd.DataFrame(os.listdir(TRG_PATH + sets))[0].str[:-4].values.tolist()
    annotation = {}
    annotation['type'] = 'instances'
    annotation['categories'] = []
    annotation['images'] = []
    annotation['annotations'] = []
    annotation['categories'].append({'supercategory': 'none', 'id': 1, 'name': 'typical'})
    annotation['categories'].append({'supercategory': 'none', 'id': 2, 'name': 'indeterminate'})
    annotation['categories'].append({'supercategory': 'none', 'id': 3, 'name': 'atypical'})
    for i, data in train_image[train_image.id.isin(image_id)].iterrows():
        dic = {}
        dic['file_name'] = data['id']+'.jpg'
        dic['height'] = 256
        dic['width'] = 256
        dic['id'] = data.name + 1
        annotation['images'].append(dic)
        cnt = 1

    for i, data in train_image.iterrows():
        if type(data['boxes']) == float: # nan
            continue
        # split box string
        boxes = json.loads(data['boxes'].replace('\'', '\"'))
        
        # reverse x,y cell count
        ycell, xcell = data['xcell'], data['ycell']
        
        # category
        t, i, a = data['Typical'], data['Indeterminate'], data['Atypical']
        if t==1:
            category = 1
        elif i==1:
            category = 2
        elif a == 1:
            category = 3
        
        # add boxes
        for j in boxes:
            dic = {}
            dic['area'] = (j['width']*256)//xcell * (j['height']*256)//ycell
            dic['iscrowd'] = 0
            dic['image_id'] = data.name + 1
            dic['bbox'] = [(j['x']*256)//xcell, (j['y']*256)//ycell,
                        (j['width']*256)//xcell, (j['height']*256)//ycell]
            dic['category_id'] = category
            dic['id'] = cnt
            dic['ignore'] = 0
            dic['segmentation'] = []
            cnt += 1
            annotation['annotations'].append(dic)
            
    # save annotation json files
    with open(f'{TRG_PATH}annotations/instances_{sets}.json', 'w') as f:
        json.dump(annotation, f)

In [20]:
os.makedirs(TRG_PATH + 'annotations', exist_ok=True)
anno('train')
anno('val')