In [25]:
import numpy as np
import pandas as pd
import pydicom
%matplotlib inline
import matplotlib.pyplot as plt
import keras 

import glob as glob
from skimage.transform import resize
import tensorflow as tf
from tensorflow import keras

In [26]:
# Load the fields we care about. We appear to have some test files to work with
!ls -ltr 

total 174116
-rw-r--r-- 1 root root   1049354 Mar 21  2020  test6.dcm
-rw-r--r-- 1 root root   1049356 Mar 21  2020  test4.dcm
-rw-r--r-- 1 root root   1049354 Mar 21  2020  test5.dcm
-rw-r--r-- 1 root root   1049352 Mar 21  2020  test3.dcm
-rw-r--r-- 1 root root   1049356 Mar 21  2020  test2.dcm
-rw-r--r-- 1 root root   1049354 Mar 21  2020  test1.dcm
-rw-r--r-- 1 root root    432125 Mar 21  2020  sample_labels.csv
-rw-r--r-- 1 root root     14144 May 18  2020 'Build and train model ORIGINAL.ipynb'
-rw-r--r-- 1 root root         0 Dec 15 22:02  udacity.yml
-rw-r--r-- 1 root root         0 Dec 19 03:28  history_file
-rw-r--r-- 1 root root      1174 Dec 20 19:38  FDA_Submission_Template.md
-rw-r--r-- 1 root root   1643577 Dec 20 22:16 'Build and train model-Copy2.ipynb'
-rw-r--r-- 1 root root   1650401 Dec 20 22:42 'Build and train model-Copy1.ipynb'
-rw-r--r-- 1 root root      7203 Dec 21 01:10 'EDA ORIGINAL.ipynb'
-rw-r--r-- 1 root root   1620370 Dec 21 17:36 'Build and

In [27]:
# Quick review of the files and DICOM criteria

# Read files in 
dem_files = glob.glob('./test*')

# Prior to turning this into a function, let's review our ability to extract features
dcm_df = pd.DataFrame()

for x in dem_files:
    
    ds = pydicom.dcmread(x) 
    
    # Identify eligibility of the series for our image
    if (int(ds.PatientAge) < 85) and\
    (int(ds.PatientAge) > 10) and\
    (str(ds.PatientPosition) in ['PA', 'AP']) and\
    (ds.Modality == 'DX') and\
    (str(ds.BodyPartExamined) == 'CHEST'):
        temp_Eligibility = True 
    else:
        temp_Eligibility = False
    
    #Subset the stuff that is relevant in establishing if the file is valid for our model
    temp_df = pd.DataFrame(data={
                'Patient ID':[ds.PatientID],
                'Finding Labels':[ds.StudyDescription],
                'Patient Age':[ds.PatientAge],
                'Patient Gender':[ds.PatientSex],
                'View Position':[ds.PatientPosition],
                'Body Part':[ds.BodyPartExamined],
                'Modality':[ds.Modality],
                'Eligibility':[temp_Eligibility]})
    
    # Identify eligibility of the series for our image
    
    
    dcm_df = dcm_df.append(temp_df)
 

dcm_df

Unnamed: 0,Patient ID,Finding Labels,Patient Age,Patient Gender,View Position,Body Part,Modality,Eligibility
0,2,No Finding,81,M,PA,CHEST,CT,False
0,2,No Finding,81,M,PA,RIBCAGE,DX,False
0,2,No Finding,81,M,XX,CHEST,DX,False
0,1,Cardiomegaly,58,M,AP,CHEST,DX,True
0,61,Effusion,77,M,AP,CHEST,DX,True
0,2,No Finding,81,M,PA,CHEST,DX,True


In [28]:

# This function reads in a .dcm file, checks the important fields for our device, and returns a numpy array
# of just the imaging data
def check_dicom(filename): 
    # todo
    
    print('--------------')
    print('Load file {} ...'.format(filename))
    
    try:
        ds = pydicom.dcmread(filename)       
        img = ds.pixel_array
    except:
        print("Unable to load pixel data from DICOM file")
    
    # Identify eligibility of the series for our image
    if (int(ds.PatientAge) < 85) and\
    (int(ds.PatientAge) > 10) and\
    (str(ds.PatientPosition) in ['PA', 'AP']) and\
    (ds.Modality == 'DX') and\
    (str(ds.BodyPartExamined) == 'CHEST'):
        temp_Eligibility = True 
    else:
        temp_Eligibility = False
        print("CAUTION: DICOM attributes deviate from indicated model specification")
    
    if temp_Eligibility:
        return img
    else:
        return None 
    
    
# This function takes the numpy array output by check_dicom and 
# runs the appropriate pre-processing needed for our model input
def preprocess_image(img,img_mean,img_std,IMG_SIZE): 
    # todo
    
    # Resize the image
    img = resize(img, IMG_SIZE)
    
    # Standardize the image -- is this method really equivalent what was applied in 
    # augmentation? Regardless, the presence of these arguments in the function lead me
    # to believe it's required. 
    proc_img = (img - img_mean) / img_std
    
    return proc_img


# This function loads in our trained model w/ weights and compiles it 
def load_model(model_path, weight_path):
    # todo
    
    # Load the model
    with open(model_path, 'r') as raw_file:
        json_model = raw_file.read()
    
    model = keras.models.model_from_json(json_model)
    
    # Now the weights
    model.load_weights(weight_path)
    
    # Do we also have to compile?
    
    return model


# This function uses our device's threshold parameters to predict whether or not
# the image shows the presence of pneumonia using our trained model
def predict_image(model, img, thresh): 
    # todo    
    
    pred_Y = model.predict(img)
    
    return np.where(pred_Y > thresh, "Pneumonia", 'No Pneumonia') 

In [29]:
test_dicoms = ['test1.dcm','test2.dcm','test3.dcm','test4.dcm','test5.dcm','test6.dcm']

model_path = 'my_model.json'
weight_path = 'xray_class_my_model.best.hdf5'

# Honestly, I'm not so sure on the descriptive stats used here. I computed on one batch of training data. 
# I think this makes a _degree_ of sense, assuming that we're looking at standardized values.

IMG_SIZE=(1,224,224,3) # This might be different if you did not use vgg16
img_mean = 115 # loads the mean image value they used during training preprocessing
img_std = 69 # loads the std dev image value they used during training preprocessing

my_model = load_model(model_path, weight_path)
thresh = 0.63 #loads the threshold they chose for model classification 

# use the .dcm files to test your prediction
for i in test_dicoms:
    
    img = np.array([])
    img = check_dicom(i)
    
    if img is None:
         print("Inference witheld due to deviations from DICOM model requirements")
        
    else: 
        img_proc = preprocess_image(img,img_mean,img_std,IMG_SIZE)

        pred = predict_image(my_model,img_proc,thresh)

        print(pred)

--------------
Load file test1.dcm ...
[['No Pneumonia']]
--------------
Load file test2.dcm ...
[['No Pneumonia']]
--------------
Load file test3.dcm ...
[['No Pneumonia']]
--------------
Load file test4.dcm ...
CAUTION: DICOM attributes deviate from indicated model specification
Inference witheld due to deviations from DICOM model requirements
--------------
Load file test5.dcm ...
CAUTION: DICOM attributes deviate from indicated model specification
Inference witheld due to deviations from DICOM model requirements
--------------
Load file test6.dcm ...
CAUTION: DICOM attributes deviate from indicated model specification
Inference witheld due to deviations from DICOM model requirements
