### **download external packages**

In [1]:
HELPER_DIR = '/kaggle/input/pydicom-conda-helper/'

!conda install {HELPER_DIR+'libjpeg-turbo-2.1.0-h7f98852_0.tar.bz2'} -c conda-forge -y -q
!conda install {HELPER_DIR+'libgcc-ng-9.3.0-h2828fa1_19.tar.bz2'} -c conda-forge -y -q
!conda install {HELPER_DIR+'gdcm-2.8.9-py37h500ead1_1.tar.bz2'} -c conda-forge -y -q
!conda install {HELPER_DIR+'conda-4.10.1-py37h89c1867_0.tar.bz2'} -c conda-forge -y -q
!conda install {HELPER_DIR+'certifi-2020.12.5-py37h89c1867_1.tar.bz2'} -c conda-forge -y -q
!conda install {HELPER_DIR+'openssl-1.1.1k-h7f98852_0.tar.bz2'} -c conda-forge -y -q

Preparing transaction: ...working... done
Verifying transaction: ...working... done
Executing transaction: ...working... done
Preparing transaction: ...working... done
Verifying transaction: ...working... done
Executing transaction: ...working... done
Preparing transaction: ...working... done
Verifying transaction: ...working... done
Executing transaction: ...working... done
Preparing transaction: ...working... done
Verifying transaction: ...working... done
Executing transaction: ...working... done
Preparing transaction: ...working... done
Verifying transaction: ...working... done
Executing transaction: ...working... done
Preparing transaction: ...working... done
Verifying transaction: ...working... done
Executing transaction: ...working... done


### **import dependencies**

In [2]:
import os
import shutil
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import PIL
import pydicom

import tensorflow as tf
from tensorflow.keras.applications.vgg16 import preprocess_input, VGG16

from collections import Counter
from pathlib import Path
from tqdm.auto import tqdm

### **configuration and initialization**

In [3]:
SIIM_COVID19_DETECTION_DIR = '/kaggle/input/siim-covid19-detection/'
PART1_DIR = '/kaggle/input/part1-siim-covid19-model-image-yolov5/'
PART2_DIR = '/kaggle/input/part2-siim-covid19-model-study-vgg16/'
WORKING_DIR = '/kaggle/working/'

INPUT_DIR = SIIM_COVID19_DETECTION_DIR+'test/'

TEMP_DIR = '/kaggle/temp/'
OUTPUT_DIR = TEST_DIR = '/kaggle/temp/test/'

YOLOV5_DIR = PART1_DIR+'yolov5/yolov5/'
VGG16_DIR = PART2_DIR+'vgg16/'

RESULT_NAME = 'inference'
DETECT_DIR = 'runs/detect/'+RESULT_NAME+'/labels/'

SAMPLE_SUBMISSION_PATH = SIIM_COVID19_DETECTION_DIR+'sample_submission.csv'


IMG_SIZE = WIDTH = HEIGHT = 512
INTERPOLATION = cv2.INTER_LANCZOS4
N_IMAGE_TO_VISUALIZE = 25

In [4]:
os.makedirs(OUTPUT_DIR, exist_ok=True)

### **load submission file and split df study/image**

In [5]:
df_submission = pd.read_csv(SAMPLE_SUBMISSION_PATH)

df_submission_study = df_submission.loc[df_submission.id.str.contains('_study')].copy().reset_index(drop=True)
df_submission_image = df_submission.loc[df_submission.id.str.contains('_image')].copy().reset_index(drop=True)

### **get path dicom files**

In [6]:
path_dicom_files = []

total = sum([len(f) for r, d, f in os.walk(INPUT_DIR)])

with tqdm(total=total) as pbar:
    for dirname, _, filenames in os.walk(INPUT_DIR):
        for file in filenames:
            path_dicom_files.append(Path(os.path.join(dirname, file)))
            pbar.update(1)

  0%|          | 0/1263 [00:00<?, ?it/s]

### **rescale all test images and save to jpg / save original width and height**

In [7]:
df_submission_image.loc[:,"width"] = np.nan
df_submission_image.loc[:,"height"] = np.nan


for p in tqdm(path_dicom_files):
    dcm = pydicom.dcmread(p)
    img = dcm.pixel_array
    img_name = p.parts[-1][0:-4]
    
    index = df_submission_image[df_submission_image['id'].str.contains(img_name)].index
    df_submission_image.loc[index, ['width']] = img.shape[0]
    df_submission_image.loc[index, ['height']] = img.shape[1]

    if dcm.PhotometricInterpretation == "MONOCHROME1":
        img = cv2.bitwise_not(img)
    img = cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)
    img = cv2.resize(img, (WIDTH, HEIGHT), interpolation = INTERPOLATION)
    
    cv2.imwrite(OUTPUT_DIR+img_name+'.jpg', img)

  0%|          | 0/1263 [00:00<?, ?it/s]

### **visualize N sample images (optional)**

In [8]:
"""import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from skimage import io
from skimage.transform import rotate

N_ROWS = N_COLUMNS = int(np.ceil(np.sqrt(N_IMAGE_TO_VISUALIZE)))
SIZE_LAYOUT = int(IMG_SIZE*N_ROWS/3.3)

fig = make_subplots(rows=N_ROWS, cols=N_COLUMNS,horizontal_spacing = 0.01,vertical_spacing = 0.01)

for i, file in enumerate(os.listdir(OUTPUT_DIR)[0:N_IMAGE_TO_VISUALIZE],1):
    
    row=int(np.ceil(i/N_ROWS))
    col=int(i-(row-1)*N_COLUMNS)
    img = rotate(io.imread(OUTPUT_DIR+file, as_gray=True),angle=180)
    fig.add_trace(go.Heatmap(z=img, colorscale='gray',showscale=False), row, col)
    
fig.update_xaxes(showticklabels=False)
fig.update_yaxes(showticklabels=False)
fig.update_layout(height=SIZE_LAYOUT,width=SIZE_LAYOUT, showlegend=False)
fig.show()
#fig.write_image(TEMP_DIR+"visualization_"+str(N_IMAGE_TO_VISUALIZE)+".jpeg")"""

'import plotly.express as px\nimport plotly.graph_objects as go\nfrom plotly.subplots import make_subplots\nfrom skimage import io\nfrom skimage.transform import rotate\n\nN_ROWS = N_COLUMNS = int(np.ceil(np.sqrt(N_IMAGE_TO_VISUALIZE)))\nSIZE_LAYOUT = int(IMG_SIZE*N_ROWS/3.3)\n\nfig = make_subplots(rows=N_ROWS, cols=N_COLUMNS,horizontal_spacing = 0.01,vertical_spacing = 0.01)\n\nfor i, file in enumerate(os.listdir(OUTPUT_DIR)[0:N_IMAGE_TO_VISUALIZE],1):\n    \n    row=int(np.ceil(i/N_ROWS))\n    col=int(i-(row-1)*N_COLUMNS)\n    img = rotate(io.imread(OUTPUT_DIR+file, as_gray=True),angle=180)\n    fig.add_trace(go.Heatmap(z=img, colorscale=\'gray\',showscale=False), row, col)\n    \nfig.update_xaxes(showticklabels=False)\nfig.update_yaxes(showticklabels=False)\nfig.update_layout(height=SIZE_LAYOUT,width=SIZE_LAYOUT, showlegend=False)\nfig.show()\n#fig.write_image(TEMP_DIR+"visualization_"+str(N_IMAGE_TO_VISUALIZE)+".jpeg")'

### **object detection image with yoloV5 pretrained**

In [9]:
YOLOV5_WEIGHTS_PATH = YOLOV5_DIR+'weights/best.pt'
YOLOV5_DETECT_PATH = YOLOV5_DIR+'detect.py'

!python {YOLOV5_DETECT_PATH}    --weights {YOLOV5_WEIGHTS_PATH} \
                                --source {OUTPUT_DIR} \
                                --img {IMG_SIZE} \
                                --conf 0.21 \
                                --iou-thres 0.5 \
                                --max-det 4 \
                                --name {RESULT_NAME} \
                                --save-txt \
                                --save-conf \
                                --nosave

Namespace(agnostic_nms=False, augment=False, classes=None, conf_thres=0.21, device='', exist_ok=False, half=False, hide_conf=False, hide_labels=False, imgsz=512, iou_thres=0.5, line_thickness=3, max_det=4, name='inference', nosave=True, project='runs/detect', save_conf=True, save_crop=False, save_txt=True, source='/kaggle/temp/test/', update=False, view_img=False, weights=['/kaggle/input/part1-siim-covid19-model-image-yolov5/yolov5/yolov5/weights/best.pt'])
[31m[1mrequirements:[0m /kaggle/working/requirements.txt not found, check failed.
image 1/1263 /kaggle/temp/test/0026720152f5.jpg: 512x512 Done. (0.301s)
image 2/1263 /kaggle/temp/test/00fc8fc35dc1.jpg: 512x512 1 opacity, Done. (0.224s)
image 3/1263 /kaggle/temp/test/01c3512eebc3.jpg: 512x512 Done. (0.220s)
image 4/1263 /kaggle/temp/test/01f948f8e544.jpg: 512x512 Done. (0.209s)
image 5/1263 /kaggle/temp/test/022146012034.jpg: 512x512 Done. (0.253s)
image 6/1263 /kaggle/temp/test/02eceb0fc405.jpg: 512x512 1 opacity, Done. (

### **convert results .txt to df submission image**

In [10]:
def correct_bbox_format(bboxes):
    correct_bboxes = []
    for b in bboxes:
        xc, yc = int(np.round(b[0]*IMG_SIZE)), int(np.round(b[1]*IMG_SIZE))
        w, h = int(np.round(b[2]*IMG_SIZE)), int(np.round(b[3]*IMG_SIZE))

        xmin = xc - int(np.round(w/2))
        ymin = yc - int(np.round(h/2))
        xmax = xc + int(np.round(w/2))
        ymax = yc + int(np.round(h/2))
        
        correct_bboxes.append([xmin, ymin, xmax, ymax])
        
    return correct_bboxes

def scale_bboxes_to_original(row, bboxes):
    # Get scaling factor
    scale_x = IMG_SIZE/row.width
    scale_y = IMG_SIZE/row.height
    
    scaled_bboxes = []
    for bbox in bboxes:
        xmin, ymin, xmax, ymax = bbox
        
        xmin = int(np.round(xmin/scale_x))
        ymin = int(np.round(ymin/scale_y))
        xmax = int(np.round(xmax/scale_x))
        ymax = int(np.round(ymax/scale_y))
        
        scaled_bboxes.append([xmin, ymin, xmax, ymax])


def get_conf_bboxes(file_path):
    confidence = []
    bboxes = []
    with open(file_path, 'r') as file:
        for line in file:
            preds = line.strip('\n').split(' ')
            preds = list(map(float, preds))
            confidence.append(preds[-1])
            bboxes.append(preds[1:-1])
    return confidence, bboxes

In [11]:
results = os.listdir(DETECT_DIR)

In [12]:
image_pred_strings = []
for i in tqdm(range(len(df_submission_image))):
    row = df_submission_image.loc[i]
    img_name = row.id[:-6]
    
    if f'{img_name}.txt' in results:
        confidence, bboxes = get_conf_bboxes(f'{DETECT_DIR}/{img_name}.txt')
        bboxes = correct_bbox_format(bboxes)
        pred_string = ''
        for j, conf in enumerate(confidence):
            pred_string += f'opacity {conf:.6f} ' + ' '.join(map(str, bboxes[j])) + ' '
        image_pred_strings.append(pred_string[:-1]) 
    else:
        image_pred_strings.append("None 1 0 0 1 1")

  0%|          | 0/1263 [00:00<?, ?it/s]

In [13]:
df_submission_image['PredictionString'] = image_pred_strings
df_submission_image = df_submission_image.loc[:, ['id','PredictionString']]

### **classification study with vgg16 pretrained**

**create df image with opacity dectect by yolov5**

In [14]:
df_image_opacity = df_submission_image.loc[~(df_submission_image['PredictionString'] == 'None 1 0 0 1 1')]
df_image_opacity = df_image_opacity.apply(lambda row: row.id.split('_')[0], axis=1)
df_image_opacity = df_image_opacity.reset_index(drop=True)

print("shape df_image_opacity : ",df_image_opacity.shape)
df_image_opacity.sample(1)

shape df_image_opacity :  (640,)


52    c13357e59083
dtype: object

**create df study with path of images of the study**

In [15]:
df_study = df_submission_study.copy().drop('PredictionString', axis=1)
df_study['id'] = df_study.apply(lambda row: row.id.split('_')[0], axis=1)
df_study['paths_image'] = df_study.apply(lambda row: [], axis=1)

for p in tqdm(path_dicom_files):
    study_name = p.parts[-3]
    img_name = p.parts[-1][0:-4]
    for index, row in df_study.iterrows():
        if row.id == study_name:
            df_study.loc[index, 'paths_image'].append(img_name+'.jpg')

print("shape df_study : ",df_study.shape)
df_study.sample(1)

  0%|          | 0/1263 [00:00<?, ?it/s]

shape df_study :  (1214, 2)


Unnamed: 0,id,paths_image
187,23847f6a41a6,[39f8e38c9d43.jpg]


**drop row in df study that dont have image with opacity**

In [16]:
with tqdm(total=len(df_study)) as pbar:
    for index, row in df_study.iterrows():
        paths_image = df_study.loc[index, 'paths_image']
        if df_image_opacity.apply(lambda x: any([k[0:-4] in x for k in paths_image])).any():
            df_study = df_study.drop(index)
        pbar.update()

df_study = df_study.reset_index(drop=True)

print("shape df_study : ",df_study.shape)
df_study.sample(1)

  0%|          | 0/1214 [00:00<?, ?it/s]

shape df_study :  (587, 2)


Unnamed: 0,id,paths_image
253,6ef0c3dad8c6,[ec563919514c.jpg]


**predictions study on VGG16**

In [17]:
MODEL_VGG16 = tf.keras.models.load_model(VGG16_DIR)
IMG_SIZE_VGG16 = 224

CLASSES = {
    0: 'atypical 1 0 0 1 1',
    1: 'indeterminate 1 0 0 1 1',
    2: 'negative 1 0 0 1 1',
    3: 'typical 1 0 0 1 1'
 }

In [18]:
def predict(path_file) :
    img = tf.keras.preprocessing.image.load_img(path_file, target_size=(IMG_SIZE_VGG16, IMG_SIZE_VGG16), interpolation='lanczos')
    img = tf.keras.preprocessing.image.img_to_array(img)
    img = img.reshape((1, img.shape[0], img.shape[1], img.shape[2]))
    img = preprocess_input(img)
    predictions = MODEL_VGG16.predict(img)
    predictions = np.argmax(predictions, axis=1)
    return predictions[0] 

In [19]:
def study_predictions(paths_image):
    predictions = []
    for file_name in paths_image:
        path_file = OUTPUT_DIR+file_name
        predictions.append(predict(path_file))
    
    return CLASSES.get(Counter(predictions).most_common(1)[0][0])
        

df_study['PredictionString'] = df_study['paths_image'].apply(study_predictions)
df_study = df_study.drop(columns=['paths_image'])
df_study['id'] = df_study.apply(lambda row: row.id+'_study', axis=1)
df_study.sample(5)

Unnamed: 0,id,PredictionString
367,9ec66102e783_study,typical 1 0 0 1 1
222,5ed4ac4c2302_study,typical 1 0 0 1 1
565,f7d3c8acc574_study,typical 1 0 0 1 1
357,9a1a4fe68496_study,negative 1 0 0 1 1
514,dd33c10f402b_study,negative 1 0 0 1 1


In [20]:
df_study = df_study.merge(df_submission_study, on='id' , how='outer')
df_study = df_study.drop(columns=['PredictionString_y'])
df_study = df_study.rename(columns={'PredictionString_x':'PredictionString'})
df_study['PredictionString'] = df_study['PredictionString'].apply(lambda x: CLASSES.get(2) if pd.isnull(x) else x)

df_submission_study = df_study

### **remove output runs, merge submission files then export**

In [21]:
!rm -r 'runs'

In [22]:
df_submission = pd.concat([df_submission_image, df_submission_study], ignore_index=True)
df_submission.to_csv(WORKING_DIR+'submission.csv', index=False)

### **ref**


* https://www.kaggle.com/xhlulu
* https://www.kaggle.com/yujiariyasu
* https://www.kaggle.com/ayuraj
* https://www.kaggle.com/dschettler8845   
....