**Import necessary packages**

In [1]:
import os
import cv2
import yaml
import torch
import pandas as pd
from PIL import Image
from torch.utils.data import DataLoader
import numpy as np
import pydicom
from tqdm.notebook import tqdm
from pydicom.pixel_data_handlers.util import apply_voi_lut
import torch.nn.functional as F
from albumentations import Compose, Normalize, Resize, HorizontalFlip, RandomBrightnessContrast, ShiftScaleRotate, IAAAdditiveGaussianNoise, RandomCrop
from albumentations.pytorch import ToTensorV2
from efficientnet_pytorch import EfficientNet

In [2]:
def dicom2array(path, voi_lut=True, fix_monochrome=True):
    """ Convert dicom file to numpy array 
    
    Args:
        path (str): Path to the dicom file to be converted
        voi_lut (bool): Whether or not VOI LUT is available
        fix_monochrome (bool): Whether or not to apply monochrome fix
        
    Returns:
        Numpy array of the respective dicom file 
        
    """
    # Use the pydicom library to read the dicom file
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to 
    # transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
        
    # The XRAY may look inverted
    #   - If we want to fix this we can
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    
    # Normalize the image array and return
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data

**Prepare test dataframe**

In [8]:
test_meta_df = pd.DataFrame(columns=["image_id", "study_id", "dcm_path", "width", "height"])
images, studies, dcm_path, widths, heights = [], [], [], [], []
tk = tqdm(enumerate(os.listdir(r'D:\Nada\kaggle\Covid19-Detection\data\test')), 
          total=len(os.listdir(r'D:\Nada\kaggle\Covid19-Detection\data\test')))
for _, study in tk:
    for serie in os.listdir(r'D:\Nada\kaggle\Covid19-Detection\data\test\{}'.format(study)):
        for image in os.listdir(r'D:\Nada\kaggle\Covid19-Detection\data\test\{}\{}'.format(study, serie)):
            images.append(image)
            studies.append(study)
            dcm_path.append(r'D:\Nada\kaggle\Covid19-Detection\data\test\{}\{}\{}'.format(study, serie, image))
            arr = dicom2array(dcm_path[-1])
            arr = np.stack((arr, arr, arr), axis=-1)
            widths.append(arr.shape[1])
            heights.append(arr.shape[0])
            
test_meta_df["image_id"]=images
test_meta_df["study_id"]=studies
test_meta_df["dcm_path"]=dcm_path
test_meta_df["width"]=widths
test_meta_df["height"]=heights

  0%|          | 0/1214 [00:00<?, ?it/s]

In [50]:
TEST_PATH = r'.\yolov5\covid19_test_images'
os.makedirs('./output', exist_ok=True)

In [52]:
MODEL_PATH = './yolov5/runs/train/single_class_YOLOx-35ep/weights/best.pt'
!python ./yolov5/detect.py --weights {MODEL_PATH} \
                                                          --source {TEST_PATH} \
                                                          --img 256 \
                                                          --conf-thres 0.357 \
                                                          --iou-thres 0.5 \
                                                          --max-det 5 \
                                                          --save-txt \
                                                          --save-conf \
                                                          --project {'./output'} \
                                                          --nosave \
                                                          --name ""\
                                                          --exist-ok \
                                                          --device "0"


Namespace(agnostic_nms=False, augment=False, classes=None, conf_thres=0.357, device='0', exist_ok=True, hide_conf=False, hide_labels=False, img_size=256, iou_thres=0.5, line_thickness=3, max_det=5, name='', nosave=True, project='./output', save_conf=True, save_crop=False, save_txt=True, source='.\\yolov5\\covid19_test_images', update=False, view_img=False, weights=['./yolov5/runs/train/single_class_YOLOx-35ep/weights/best.pt'])
[31m[1mrequirements:[0m requirements.txt not found, check failed.
image 1/1278 D:\Nada\kaggle\SIIM-covid19-detection-classification\yolov5\covid19_test_images\0026720152f5_image.jpg: 256x256 1 opacity, Done. (0.030s)
image 2/1278 D:\Nada\kaggle\SIIM-covid19-detection-classification\yolov5\covid19_test_images\00fc8fc35dc1_image.jpg: 256x224 2 opacitys, Done. (0.028s)
image 3/1278 D:\Nada\kaggle\SIIM-covid19-detection-classification\yolov5\covid19_test_images\01c3512eebc3_image.jpg: 256x256 Done. (0.027s)
image 4/1278 D:\Nada\kaggle\SIIM-covid19-detection-class

YOLOv5  9f2ea89 torch 1.7.1 CUDA:0 (GeForce GTX 980 Ti, 6144.0MB)

Fusing layers... 
Model Summary: 476 layers, 87198694 parameters, 0 gradients


In [46]:
class Data:
    def __init__(self, images, transform=None):
        self.images = images
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):

        file_path = os.path.join(TEST_PATH, self.images[idx][:-4]+"_image"+".jpg")
        image = cv2.imread(file_path)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image

In [47]:
transform = Compose([
            Resize(224, 244),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])
test_dataset = Data(test_meta_df.image_id,transform = transform)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [48]:
model_main_path= './CNN/results/EfficientNet0'
models = os.listdir(model_main_path)
models

['FOLD1_62.23.pth',
 'FOLD2_63.69.pth',
 'FOLD3_60.09.pth',
 'FOLD4_61.42.pth',
 'FOLD5_61.97.pth',
 'ML_models',
 'plots']

In [49]:
# CNN predictions averaged across the 5 folds
from sklearn.decomposition import PCA

LABEL_LIST = ['atypical', 'indeterminate', 'negative', 'typical']

device = "cuda"
CNN_preds = np.zeros((len(test_dataset), len(LABEL_LIST)))
reduced_features = []
pca = PCA(n_components=128)
for model_path in models:
    if model_path[-3:] == "pth":
        weights_path = os.path.join(model_main_path, model_path)
        model = EfficientNet.from_pretrained("efficientnet-b0", weights_path=weights_path, advprop=False, include_top=True, num_classes=len(LABEL_LIST)).to(device)

        model.eval()
        preds = []
        all_features = []
        tk = tqdm(enumerate(test_loader), total=len(test_loader))
        for i, (images) in tk:
            images = images.to(device).float()
            with torch.no_grad():
                y_preds = model(images)
                features = model.extract_features(images)
                all_features.append(torch.reshape(features, (features.shape[0], -1)).cpu().numpy())
            preds.append(F.softmax(y_preds).cpu().numpy())
        CNN_preds += np.concatenate(preds)/len(models)

        all_features = np.concatenate(all_features)
        reduced_features.append(pca.fit_transform(all_features))

Loaded pretrained weights for efficientnet-b0


  0%|          | 0/20 [00:00<?, ?it/s]



Loaded pretrained weights for efficientnet-b0


  0%|          | 0/20 [00:00<?, ?it/s]



Loaded pretrained weights for efficientnet-b0


  0%|          | 0/20 [00:00<?, ?it/s]



Loaded pretrained weights for efficientnet-b0


  0%|          | 0/20 [00:00<?, ?it/s]



Loaded pretrained weights for efficientnet-b0


  0%|          | 0/20 [00:00<?, ?it/s]



PermissionError: [Errno 13] Permission denied: './CNN/results/EfficientNet0\\ML_models'

In [53]:
cnn_preds_df = pd.DataFrame(columns=["id"]+LABEL_LIST)
cnn_preds_df["id"] = [e[:-4]+"_image" for e in test_meta_df["image_id"]]
cnn_preds_df[LABEL_LIST] = CNN_preds

In [54]:
output_path = './output'
ss_df = pd.read_csv("./input/sample_submission.csv")

In [94]:
#Image-level predictions
image_preds = pd.DataFrame(columns=["id", "PredictionString"])#+LABEL_LIST)
predictions= []
ids = []
meta_data_test = []
m_x1, m_x2, m_y1, m_y2 = 0,0,0,0
for i, (_, row) in enumerate(test_meta_df.iterrows()):
    predictions.append("")
    #if row.id.split("_")[1] == "image":
    ids.append(row.image_id[:-4]+"_image")
    if str(row.image_id[:-4])+"_image.txt" in os.listdir(output_path+'/labels'):
        with open(output_path+'/labels/{}.txt'.format(row.image_id[:-4]+"_image"), 'r') as f:
            nb = 0
            for line in f:
                nb+=1
                out = line.rsplit("\n")[0].split(" ")
                xc, yc, w, h = float(out[1]), float(out[2]), float(out[3]), float(out[4])
                x1, y1, x2, y2 = (xc-w/2)*row.width, (yc-h/2)*row.height, (xc+w/2)*row.width,( yc+h/2)*row.height
                predictions[-1]+="opacity {:.6f} {} {} {} {} ".format(float(out[5]), int(x1), int(y1), int(x2), int(y2))
                m_x1 += x1
                m_x2 += x2
                m_y1 += y1
                m_y2 += y2
            predictions[-1]=predictions[-1][:-1]
        meta_data_test.append([m_x1/nb, m_x2/nb, m_y1/nb, m_y2/nb, row.height, row.width])
    else:
        predictions[-1]+= "none 1.0 0 0 1 1"
        meta_data_test.append([0,0,0,0, row.height, row.width])
            
meta_data_test = np.array(meta_data_test)        
image_preds["id"] = ids
image_preds["PredictionString"] = predictions

In [95]:
# LightGBM predictions
import lightgbm as lgb
import re

ml_models = os.listdir(os.path.join(model_main_path,"ML_models"))
ML_preds = np.zeros((len(test_dataset), len(LABEL_LIST)))

for n in range(5):
    r = re.compile(".*Fold.*"+str(n+1))
    ml_path = list(filter(r.match, ml_models))[0]

    df_meta_test = pd.DataFrame(columns = ["f_{}".format(i+1) for i in range(128)], data = reduced_features[n])
    df_meta_test["BBoxArea"] = (meta_data_test[:,1]/meta_data_test[:,5]-meta_data_test[:,0]/meta_data_test[:,5])*(meta_data_test[:,3]/meta_data_test[:,4]-meta_data_test[:,2]//meta_data_test[:,4])
    df_meta_test["AspectRatio"] = (meta_data_test[:,1]-meta_data_test[:,0])/(meta_data_test[:,3]-meta_data_test[:,2])

    clf = lgb.Booster(model_file=os.path.join(os.path.join(model_main_path,"ML_models"), ml_path))
    ML_preds +=  clf.predict(df_meta_test)/len(models)

  


In [96]:
ml_preds_df = pd.DataFrame(columns=["id"]+LABEL_LIST)
ml_preds_df["id"] = [e[:-4]+"_image" for e in test_meta_df["image_id"]]
ml_preds_df[LABEL_LIST] = ML_preds

In [97]:
cnnml_preds_df = pd.DataFrame(columns=["id"]+LABEL_LIST)
cnnml_preds_df["id"] = [e[:-4]+"_image" for e in test_meta_df["image_id"]]
cnnml_preds_df[LABEL_LIST] = np.max(np.array([CNN_preds, ML_preds]), axis=0)

In [110]:
#CNN preds
image_lvl_predictions = pd.merge(image_preds, cnn_preds_df, on="id")

#ML preds
#image_lvl_predictions = pd.merge(image_preds, ml_preds_df, on="id")

#CNN+ML preds
#image_lvl_predictions = pd.merge(image_preds, cnnml_preds_df, on="id")

In [111]:
#Study-level predictions 
study_preds = pd.DataFrame(columns=["id", "PredictionString"])
predictions= []
ids = []
for i, (_, row) in enumerate(ss_df.iterrows()):
    if row.id.split("_")[1] == "study":
        ids.append(row.id)
        predictions.append("")
        study_imgs = test_meta_df[test_meta_df["study_id"]==row.id.split("_")[0]].image_id.apply(lambda x:x[:-4]+"_image")
        pred_lbls = np.mean(np.array(image_lvl_predictions[image_lvl_predictions["id"].isin(study_imgs)][LABEL_LIST]), axis=0)
        for j in range(len(LABEL_LIST)):
            predictions[-1]+= "{} {:.6f} 0 0 1 1 ".format(LABEL_LIST[j], pred_lbls[j])
        predictions[-1] = predictions[-1][:-1]
study_lvl_predictions =  pd.DataFrame(columns=["id", "PredictionString"])
study_lvl_predictions["id"] = ids
study_lvl_predictions["PredictionString"] = predictions

In [112]:
submission = pd.concat([study_lvl_predictions, image_lvl_predictions[["id", "PredictionString"]]])
submission.to_csv("predictions.csv", index=False)

In [113]:
submission

Unnamed: 0,id,PredictionString
0,00188a671292_study,atypical 0.198247 0 0 1 1 indeterminate 0.1925...
1,004bd59708be_study,atypical 0.150155 0 0 1 1 indeterminate 0.1923...
2,00508faccd39_study,atypical 0.176205 0 0 1 1 indeterminate 0.1821...
3,006486aa80b2_study,atypical 0.191835 0 0 1 1 indeterminate 0.1746...
4,00655178fdfc_study,atypical 0.178840 0 0 1 1 indeterminate 0.1740...
...,...,...
1258,064b37b01cd2_image,opacity 0.373535 2608 898 3553 3110
1259,02eceb0fc405_image,opacity 0.535645 1205 1569 1812 2191
1260,16a0d37747d7_image,opacity 0.502930 925 719 2219 2875 opacity 0.6...
1261,aa55fc6c616c_image,none 1.0 0 0 1 1
