**Download YOLOV5 Github Repo**

In [1]:
!cp /kaggle/input/load-gdcm-in-notebook-without-internet/gdcm.tar .
!tar -xvzf gdcm.tar
!conda install --offline /kaggle/input/load-gdcm-in-notebook-without-internet/gdcm/gdcm-2.8.9-py37h71b2a6d_0.tar.bz2
!rm -rf /kaggle/working/gdcm.tar

gdcm/
gdcm/conda-4.8.4-py37hc8dfbb8_2.tar.bz2
gdcm/gdcm-2.8.9-py37h71b2a6d_0.tar.bz2
gdcm/libjpeg-turbo-2.0.3-h516909a_1.tar.bz2

Downloading and Extracting Packages
######################################################################## | 100% 
Preparing transaction: - done
Verifying transaction: | / done
Executing transaction: \ done


**Import necessary packages**

In [2]:
import os
import cv2
import yaml
import torch
import pandas as pd
from PIL import Image
from torch.utils.data import DataLoader
import numpy as np
import pydicom
from tqdm.notebook import tqdm
from pydicom.pixel_data_handlers.util import apply_voi_lut
import torch.nn.functional as F
from albumentations import Compose, Normalize, Resize, HorizontalFlip, RandomBrightnessContrast, ShiftScaleRotate, IAAAdditiveGaussianNoise, RandomCrop
from albumentations.pytorch import ToTensorV2
from efficientnet import EfficientNet

In [3]:
def dicom2array(path, voi_lut=True, fix_monochrome=True):
    """ Convert dicom file to numpy array 
    
    Args:
        path (str): Path to the dicom file to be converted
        voi_lut (bool): Whether or not VOI LUT is available
        fix_monochrome (bool): Whether or not to apply monochrome fix
        
    Returns:
        Numpy array of the respective dicom file 
        
    """
    # Use the pydicom library to read the dicom file
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to 
    # transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
        
    # The XRAY may look inverted
    #   - If we want to fix this we can
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    
    # Normalize the image array and return
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data

**Prepare test dataframe**

In [4]:
test_meta_df = pd.DataFrame(columns=["image_id", "study_id", "dcm_path", "width", "height"])
os.makedirs('/kaggle/covid19_test_images', exist_ok=True)
images, studies, dcm_path, widths, heights = [], [], [], [], []
tk = tqdm(enumerate(os.listdir('/kaggle/input/siim-covid19-detection/test')), total=len(os.listdir('/kaggle/input/siim-covid19-detection/test')))
for _, study in tk:#os.listdir('/kaggle/input/siim-covid19-detection/test'):
    for serie in os.listdir('/kaggle/input/siim-covid19-detection/test/{}'.format(study)):
        for image in os.listdir('/kaggle/input/siim-covid19-detection/test/{}/{}'.format(study, serie)):
            images.append(image)
            studies.append(study)
            dcm_path.append('/kaggle/input/siim-covid19-detection/test/{}/{}/{}'.format(study, serie, image))
            
            arr = dicom2array(dcm_path[-1])
            arr = np.stack((arr, arr, arr), axis=-1)
            im = Image.fromarray(arr)
            im.save("/kaggle/covid19_test_images/{}".format(image.replace("dcm", "jpg")))
            
            widths.append(arr.shape[1])
            heights.append(arr.shape[0])
            
test_meta_df["image_id"]=images
test_meta_df["study_id"]=studies
test_meta_df["dcm_path"]=dcm_path
test_meta_df["width"]=widths
test_meta_df["height"]=heights

  0%|          | 0/1214 [00:00<?, ?it/s]

In [5]:
#Save dicom data to JPEG 
os.makedirs('/kaggle/covid19_test_images', exist_ok=True)
tk = tqdm(enumerate(os.listdir('/kaggle/input/siim-covid19-detection/test')), total=len(os.listdir('/kaggle/input/siim-covid19-detection/test')))
for i, study in tk:
    for serie in os.listdir('/kaggle/input/siim-covid19-detection/test/{}'.format(study)):
        for img in os.listdir('/kaggle/input/siim-covid19-detection/test/{}/{}'.format(study, serie)):
            arr = dicom2array("/kaggle/input/siim-covid19-detection/test/{}/{}/{}".format(study, serie, img))
            arr = np.stack((arr, arr, arr), axis=-1)
            im = Image.fromarray(arr)
            im.save("/kaggle/covid19_test_images/{}.jpg".format(img[:-4]))

  0%|          | 0/1214 [00:00<?, ?it/s]

In [6]:
TEST_PATH = '/kaggle/covid19_test_images'
os.makedirs('/kaggle/working/output', exist_ok=True)

In [7]:
MODEL_PATH = '/kaggle/input/multi-class-model/singlecls_100ep.pt'
!python /kaggle/input/yolov5code/yolov5-master/detect.py --weights {MODEL_PATH} \
                                                          --source {TEST_PATH} \
                                                          --img 256 \
                                                          --conf-thres 0.323 \
                                                          --iou-thres 0.5 \
                                                          --max-det 5 \
                                                          --save-txt \
                                                          --save-conf \
                                                          --project '/kaggle/working/output' \
                                                          --nosave \
                                                          --name ""\
                                                          --exist-ok \
                                                          --device "0"


Namespace(agnostic_nms=False, augment=False, classes=None, conf_thres=0.323, device='0', exist_ok=True, half=False, hide_conf=False, hide_labels=False, imgsz=256, iou_thres=0.5, line_thickness=3, max_det=5, name='', nosave=True, project='/kaggle/working/output', save_conf=True, save_crop=False, save_txt=True, source='/kaggle/covid19_test_images', update=False, view_img=False, weights=['/kaggle/input/multi-class-model/singlecls_100ep.pt'])
[31m[1mrequirements:[0m /kaggle/working/requirements.txt not found, check failed.
image 1/1263 /kaggle/covid19_test_images/0026720152f5.jpg: 256x256 Done. (0.040s)
image 2/1263 /kaggle/covid19_test_images/00fc8fc35dc1.jpg: 256x224 2 opacitys, Done. (0.044s)
image 3/1263 /kaggle/covid19_test_images/01c3512eebc3.jpg: 256x256 Done. (0.028s)
image 4/1263 /kaggle/covid19_test_images/01f948f8e544.jpg: 256x256 Done. (0.027s)
image 5/1263 /kaggle/covid19_test_images/022146012034.jpg: 256x256 Done. (0.026s)
image 6/1263 /kaggle/covid19_test_images/02

In [8]:
# MODEL_PATH = '/kaggle/input/multi-class-model/singlecls_yolovx_c.pt'
# !python /kaggle/input/yolov5code/yolov5-master/detect.py --weights {MODEL_PATH} \
#                                                           --source {TEST_PATH} \
#                                                           --img 256 \
#                                                           --conf-thres 0.357 \
#                                                           --iou-thres 0.5 \
#                                                           --max-det 5 \
#                                                           --save-txt \
#                                                           --save-conf \
#                                                           --project '/kaggle/working/output' \
#                                                           --nosave \
#                                                           --name ""\
#                                                           --exist-ok \
#                                                           --device "0"

In [9]:
# MODEL_PATH = '/kaggle/input/multi-class-model/singlecls_yolovs_300ep.pt'
# !python /kaggle/input/yolov5code/yolov5-master/detect.py --weights {MODEL_PATH} \
#                                                           --source {TEST_PATH} \
#                                                           --img 640 \
#                                                           --conf-thres 0.066 \
#                                                           --iou-thres 0.5 \
#                                                           --max-det 5 \
#                                                           --save-txt \
#                                                           --save-conf \
#                                                           --project '/kaggle/working/output' \
#                                                           --nosave \
#                                                           --name ""\
#                                                           --exist-ok \
#                                                           --device "0"

In [10]:
class Data:
    def __init__(self, images, transform=None):
        self.images = images
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):

        file_path = os.path.join(TEST_PATH, self.images[idx].replace("dcm", "jpg"))
        image = cv2.imread(file_path)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']

        return image

In [11]:
transform = Compose([
            Resize(224, 244),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])
test_dataset = Data(test_meta_df.image_id,transform = transform)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [12]:
#models = os.listdir('/kaggle/input/cnn-model/efficientnet-b0/efficientnet-b0')
#models

In [13]:
#model_main_path= '/kaggle/input/cnn-model/efficientnet-b0/efficientnet-b0'
model_main_path= '/kaggle/input/cnn-model/resnet18/resnet18'
models = os.listdir(os.path.join(model_main_path, "resnet18"))
models

['FOLD5_52.37.pth',
 'FOLD1_52.72.pth',
 'FOLD2_54.30.pth',
 'FOLD4_52.41.pth',
 'FOLD3_49.41.pth']

In [14]:
activation = {}
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

In [15]:
# CNN predictions averaged across the 5 folds
from sklearn.decomposition import PCA

LABEL_LIST = ['atypical', 'indeterminate', 'negative', 'typical']

device = "cuda"
CNN_preds = np.zeros((len(test_dataset), len(LABEL_LIST)))
reduced_features = []
pca = PCA(n_components=128)
for model_path in models:
    weights_path = os.path.join(model_main_path, model_path)
    #model = EfficientNet.from_pretrained("efficientnet-b0", weights_path=weights_path, advprop=False, include_top=True, num_classes=len(LABEL_LIST)).to(device)
    
    model=torchvision.models.resnet18(pretrained=False)
    model.avgpool=nn.AdaptiveAvgPool2d(1)
    model.fc=nn.Linear(model.fc.in_features, 4)
    model=model.to(device)
    model.load_state_dict(torch.load(weights_path))
    model.avgpool.register_forward_hook(get_activation('avgpool'))
        
        
    model.eval()
    preds = []
    all_features = []
    tk = tqdm(enumerate(test_loader), total=len(test_loader))
    for i, (images) in tk:
        images = images.to(device).float()
        with torch.no_grad():
            y_preds = model(images)
            #features = model.extract_features(images)
            features = activation['avgpool']
            all_features.append(torch.reshape(features, (features.shape[0], -1)).cpu().numpy())
        preds.append(F.softmax(y_preds).cpu().numpy())
    CNN_preds += np.concatenate(preds)/len(models)
    
    all_features = np.concatenate(all_features)
    reduced_features.append(pca.fit_transform(all_features))

NameError: name 'torchvision' is not defined

In [16]:
cnn_preds_df = pd.DataFrame(columns=["id"]+LABEL_LIST)
cnn_preds_df["id"] = [e[:-4]+"_image" for e in test_meta_df["image_id"]]
cnn_preds_df[LABEL_LIST] = CNN_preds

In [17]:
output_path = '/kaggle/working/output'
ss_df = pd.read_csv("/kaggle/input/siim-covid19-detection/sample_submission.csv")

In [18]:
#Image-level predictions
image_preds = pd.DataFrame(columns=["id", "PredictionString"])#+LABEL_LIST)
predictions= []
ids = []
meta_data_test = []
m_x1, m_x2, m_y1, m_y2 = 0,0,0,0
for i, (_, row) in enumerate(test_meta_df.iterrows()):
    predictions.append("")
    #if row.id.split("_")[1] == "image":
    ids.append(row.image_id[:-4]+"_image")
    if str(row.image_id[:-4])+".txt" in os.listdir(output_path+'/labels'):
        with open(output_path+'/labels/{}.txt'.format(row.image_id[:-4]), 'r') as f:
            nb = 0
            for line in f:
                nb+=1
                out = line.rsplit("\n")[0].split(" ")
                xc, yc, w, h = float(out[1]), float(out[2]), float(out[3]), float(out[4])
                x1, y1, x2, y2 = (xc-w/2)*row.width, (yc-h/2)*row.height, (xc+w/2)*row.width,( yc+h/2)*row.height
                predictions[-1]+="opacity {:.6f} {} {} {} {} ".format(float(out[5]), int(x1), int(y1), int(x2), int(y2))
                m_x1 += x1
                m_x2 += x2
                m_y1 += y1
                m_y2 += y2
            predictions[-1]=predictions[-1][:-1]
        meta_data_test.append([m_x1/nb, m_x2/nb, m_y1/nb, m_y2/nb, row.height, row.width])
    else:
        predictions[-1]+= "none 1.0 0 0 1 1"
        meta_data_test.append([0,0,0,0, row.height, row.width])
            
meta_data_test = np.array(meta_data_test)        
image_preds["id"] = ids
image_preds["PredictionString"] = predictions

In [19]:
# LightGBM predictions
import lightgbm as lgb
import re

ml_models = os.path.join(model_main_path,"ML_models") #os.listdir('/kaggle/input/cnn-model/ML_models/ML_models')
ML_preds = np.zeros((len(test_dataset), len(LABEL_LIST)))

for n in range(len(models)):
    r = re.compile(".*Fold.*"+str(n+1))
    ml_path = list(filter(r.match, ml_models))[0]

    df_meta_test = pd.DataFrame(columns = ["f_{}".format(i+1) for i in range(128)], data = reduced_features[n])
    df_meta_test["BBoxArea"] = (meta_data_test[:,1]/meta_data_test[:,5]-meta_data_test[:,0]/meta_data_test[:,5])*(meta_data_test[:,3]/meta_data_test[:,4]-meta_data_test[:,2]//meta_data_test[:,4])
    df_meta_test["AspectRatio"] = (meta_data_test[:,1]-meta_data_test[:,0])/(meta_data_test[:,3]-meta_data_test[:,2])

    clf = lgb.Booster(model_file=os.path.join(model_main_path, ml_path))
    ML_preds +=  clf.predict(df_meta_test)/len(models)

IndexError: list index out of range

In [20]:
ml_preds_df = pd.DataFrame(columns=["id"]+LABEL_LIST)
ml_preds_df["id"] = [e[:-4]+"_image" for e in test_meta_df["image_id"]]
ml_preds_df[LABEL_LIST] = ML_preds

In [21]:
cnnml_preds_df = pd.DataFrame(columns=["id"]+LABEL_LIST)
cnnml_preds_df["id"] = [e[:-4]+"_image" for e in test_meta_df["image_id"]]
cnnml_preds_df[LABEL_LIST] = np.max(np.array([CNN_preds, ML_preds]), axis=0)

In [22]:
#CNN preds
image_lvl_predictions = pd.merge(image_preds, cnn_preds_df, on="id")
#ML preds
#image_lvl_predictions = pd.merge(image_preds, ml_preds_df, on="id")
#CNN+ML preds
#image_lvl_predictions = pd.merge(image_preds, cnnml_preds_df, on="id")

In [23]:
#Study-level predictions 
study_preds = pd.DataFrame(columns=["id", "PredictionString"])
predictions= []
ids = []
for i, (_, row) in enumerate(ss_df.iterrows()):
    if row.id.split("_")[1] == "study":
        ids.append(row.id)
        predictions.append("")
        study_imgs = test_meta_df[test_meta_df["study_id"]==row.id.split("_")[0]].image_id.apply(lambda x:x[:-4]+"_image")
        pred_lbls = np.mean(np.array(image_lvl_predictions[image_lvl_predictions["id"].isin(study_imgs)][LABEL_LIST]), axis=0)
        for j in range(len(LABEL_LIST)):
            predictions[-1]+= "{} {:.6f} 0 0 1 1 ".format(LABEL_LIST[j], pred_lbls[j])
        predictions[-1] = predictions[-1][:-1]
study_lvl_predictions =  pd.DataFrame(columns=["id", "PredictionString"])
study_lvl_predictions["id"] = ids
study_lvl_predictions["PredictionString"] = predictions

In [24]:
!rm -rf /kaggle/working/output
!rm -rf /kaggle/working/gdcm

In [25]:
submission = pd.concat([study_lvl_predictions, image_lvl_predictions[["id", "PredictionString"]]])
submission.to_csv("submission.csv", index=False)

In [26]:
submission

Unnamed: 0,id,PredictionString
0,00188a671292_study,atypical 0.000000 0 0 1 1 indeterminate 0.0000...
1,004bd59708be_study,atypical 0.000000 0 0 1 1 indeterminate 0.0000...
2,00508faccd39_study,atypical 0.000000 0 0 1 1 indeterminate 0.0000...
3,006486aa80b2_study,atypical 0.000000 0 0 1 1 indeterminate 0.0000...
4,00655178fdfc_study,atypical 0.000000 0 0 1 1 indeterminate 0.0000...
...,...,...
1258,a43200bd5ceb_image,none 1.0 0 0 1 1
1259,37bf83df1b86_image,opacity 0.383566 526 1156 1478 2246 opacity 0....
1260,b30d2aef985f_image,opacity 0.358475 2786 1130 3665 2236 opacity 0...
1261,a37a362df0ac_image,none 1.0 0 0 1 1
