# RSNA Pneumonia Detection Challenge

for more detail, please visit:
https://www.kaggle.com/c/rsna-pneumonia-detection-challenge

In [1]:
import os 
import sys

import random
import math
import numpy as np
import cv2
import matplotlib.pyplot as plt
import json
import pydicom as dcm
from imgaug import augmenters as iaa
from tqdm import tqdm
import pandas as pd 
import glob

In [2]:
# from mrcnn.config import Config
from mrcnn import utils
# import mrcnn.model as modellib
# from mrcnn import visualize
# from mrcnn.model import log

  from ._conv import register_converters as _register_converters


In [3]:
# a=os.getcwd()
# a

# Root directory of the project# Root  
ROOT_DIR = os.path.abspath('C:\\Users\\su_hang\\Kaggle')
# Directory to save logs and trained model
# MODEL_DIR = os.path.join(ROOT_DIR, 'logs')

In [4]:
train_dicom_dir  = os.path.join(ROOT_DIR, 'data\\stage_1_train_images')
test_dicom_dir = os.path.join(ROOT_DIR, 'data\\stage_1_test_images')

## Some setup functions and classes for Mask-RCNN
* dicom_fps is a list of the dicom image path and filenames
* image_annotions is a dictionary of the annotations keyed by the filenames
* parsing the dataset returns a list of the image filenames and the annotations dictionary

In [5]:
def get_dicom_fps(dicom_dir):
    dicom_fps = glob.glob(dicom_dir+'/'+'*.dcm')
    return list(set(dicom_fps))

def parse_dataset(dicom_dir, anns): 
    image_fps = get_dicom_fps(dicom_dir)
    image_annotations = {fp: [] for fp in image_fps}
    for index, row in anns.iterrows(): 
        fp = os.path.join(dicom_dir, row['patientId']+'.dcm')
        image_annotations[fp].append(row)
    return image_fps, image_annotations

In [6]:
class DetectorDataset(utils.Dataset):
    """Dataset class for training pneumonia detection on the RSNA pneumonia dataset.
    """

    def __init__(self, image_fps, image_annotations, orig_height, orig_width):
        super().__init__(self)
        
        # Add classes
        self.add_class('pneumonia', 1, 'Lung Opacity')
   
        # add images 
        for i, fp in enumerate(image_fps):
            annotations = image_annotations[fp]
            self.add_image('pneumonia', image_id=i, path=fp, 
                           annotations=annotations, orig_height=orig_height, orig_width=orig_width)
            
    def image_reference(self, image_id):
        info = self.image_info[image_id]
        return info['path']

    def load_image(self, image_id):
        info = self.image_info[image_id]
        fp = info['path']
        ds = pydicom.read_file(fp)
        image = ds.pixel_array
        # If grayscale. Convert to RGB for consistency.
        if len(image.shape) != 3 or image.shape[2] != 3:
            image = np.stack((image,) * 3, -1)
        return image

    def load_mask(self, image_id):
        info = self.image_info[image_id]
        annotations = info['annotations']
        count = len(annotations)
        if count == 0:
            mask = np.zeros((info['orig_height'], info['orig_width'], 1), dtype=np.uint8)
            class_ids = np.zeros((1,), dtype=np.int32)
        else:
            mask = np.zeros((info['orig_height'], info['orig_width'], count), dtype=np.uint8)
            class_ids = np.zeros((count,), dtype=np.int32)
            for i, a in enumerate(annotations):
                if a['Target'] == 1:
                    x = int(a['x'])
                    y = int(a['y'])
                    w = int(a['width'])
                    h = int(a['height'])
                    mask_instance = mask[:, :, i].copy()
                    cv2.rectangle(mask_instance, (x, y), (x+w, y+h), 255, -1)
                    mask[:, :, i] = mask_instance
                    class_ids[i] = 1
        return mask.astype(np.bool), class_ids.astype(np.int32)

## Get the pixel values for testing set

In [7]:
# test_dcm_fps  is the path of the image file
test_dcm_fps = glob.glob('data/stage_1_test_images/*.dcm')

In [138]:
# get the pixel values for testing set (only took the image pixel in the region of [100:900,200:800] (800*600))
prediction=[]
patientID=[]
for i in range(len(test_dcm_fps)):
    patientID.append(test_dcm_fps[i][25:61])
    if round((random.random()*1000))%3==0:
        prediction.append(str('0.9 100 200 800 600'))
    else:
        prediction.append('')   
        

In [139]:
df1=pd.DataFrame(data=patientID,columns=['patientID'])
df2=pd.DataFrame(data=prediction,columns=['PredictionString'])
df=pd.concat([df1,df2],axis=1)

In [140]:
 prediction

['',
 '',
 '',
 '',
 '',
 '',
 '0.9 100 200 800 600',
 '',
 '0.9 100 200 800 600',
 '',
 '',
 '0.9 100 200 800 600',
 '',
 '0.9 100 200 800 600',
 '0.9 100 200 800 600',
 '0.9 100 200 800 600',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '0.9 100 200 800 600',
 '',
 '',
 '0.9 100 200 800 600',
 '0.9 100 200 800 600',
 '',
 '',
 '0.9 100 200 800 600',
 '0.9 100 200 800 600',
 '',
 '0.9 100 200 800 600',
 '',
 '0.9 100 200 800 600',
 '0.9 100 200 800 600',
 '',
 '',
 '0.9 100 200 800 600',
 '',
 '',
 '',
 '0.9 100 200 800 600',
 '',
 '',
 '0.9 100 200 800 600',
 '0.9 100 200 800 600',
 '0.9 100 200 800 600',
 '',
 '0.9 100 200 800 600',
 '',
 '0.9 100 200 800 600',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '0.9 100 200 800 600',
 '',
 '',
 '',
 '',
 '0.9 100 200 800 600',
 '0.9 100 200 800 600',
 '0.9 100 200 800 600',
 '',
 '',
 '',
 '',
 '',
 '',
 '0.9 100 200 800 600',
 '',
 '',
 '',
 '',
 '0.9 100 200 800 600',
 '0.9 100 200 800 600',
 '',
 '',
 '0.9 100 200 800 600',
 '0.

In [141]:
df.head()

Unnamed: 0,patientID,PredictionString
0,000924cf-0f8d-42bd-9158-1af53881a557,
1,000db696-cf54-4385-b10b-6b16fbb3f985,
2,000fe35a-2649-43d4-b027-e67796d412e0,
3,001031d9-f904-4a23-b3e5-2c088acd19c6,
4,0010f549-b242-4e94-87a8-57d79de215fc,


In [142]:
df.to_csv('Submission_File.csv',index=False)

In [101]:
test.append([0.9,300,100,600,800])

In [107]:
type(test[1][0])

float

In [93]:
np.array?