# DICOM File to Prediction
The goal of this notebook is to create a pipeline that goes from a dicom file to making a prediction about
pneumonia based on the x-ray image provided in the dicom file.

In [None]:
import numpy as np
import pandas as pd
import pydicom
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import skimage.transform

In [None]:
def check_dicom(filename):
    """
    This function reads in a .dcm file and returns a numpy array of just the imaging data
    :param filename: filename for the dicom file
    :return: array of pixels for the imaging data
    """
    print(f'Loading file {filename} ...')
    ds = pydicom.dcmread(filename)
    return ds.pixel_array


def preprocess_image(img, img_size, img_mean=None, img_std=None):
    """
    This function takes the numpy array output by check_dicom and runs the appropriate pre-processing needed for our model input
    :param img: The array of pixels from check_dicom functions
    :param img_size: The desired image size
    :param img_mean: This is the mean of the images you trained on (if applicable)
    :param img_std: This is the standard deviation of the images you trained on (if applicable)
    :return: processed image ready for model
    """
    img = (img-img_mean)/img_std if img_mean and img_std else img/255
    return skimage.transform.resize(img, img_size)


def load_model(model_path, weight_path = None):
    """
    This function loads in our trained model w/ weights and compiles it.
    :param model_path: The path to where we saved out model
    :param weight_path: Path to the model weights (if applicable)
    :return: pre-trained model
    """
    model = tf.keras.models.load_model(model_path, compile=True)
    if weight_path:
        model.load_weights(weight_path)
    return model


def predict_image(model, img, threshold):
    """
    This function predicts whether or not the image shows the presence of pneumonia based on our trained model
     and a given threshold.
    :param model: Previously trained model to predict pneumonia
    :param img: Image to make prediction on
    :param threshold: Threshold value for making prediction
    :return: Predicted Probability and 1 for Positive, 0 for negative based on threshold
    """
    img_to_model = img.reshape((1,224,224,3))
    prediction = model.predict(img_to_model)
    return float(prediction), 1.0 if prediction > threshold else 0


# Setup Tests
This section sets everything up based on the projects directory structure and the parameters we learned in the
Build and Train model notebook. We will process the image from the dicom file and generate a prediction for every
dicom file provided.


In [None]:
test_dicoms = ['test1.dcm','test2.dcm','test3.dcm','test4.dcm','test5.dcm','test6.dcm']
model_path = 'models/DenseNet_model.h5'
weight_path = "current_best.hdf5"

IMG_SIZE= (224,224,3)
img_mean = None
img_std = None

my_model = load_model(model_path)
thresh = 0.135

# use the .dcm files to process the images and make predictions
probabilities = []
predictions = []
processed_images  = []

for i in test_dicoms:
    img = check_dicom(i)
    if img is None:
        continue
    proc_img = preprocess_image(img, IMG_SIZE, img_mean, img_std)
    processed_images.append(proc_img)
    prob, pred = predict_image(my_model, proc_img, thresh)
    probabilities.append(prob)
    predictions.append(pred)


# Test Pipeline
Lets make sure this pipeline from dicom file to predictions actually works and see how it did on the
provided dicom files.

In [None]:
# Generate True Labels for visualization
true_labels = []
for filepath in test_dicoms:
    dcm = pydicom.dcmread(filepath)
    true_labels.append(1.0 if 'Pneumonia' in dcm.StudyDescription else 0.0)

# Show images and results
fig,ax = plt.subplots(2,3, figsize=(18,18))
for (img, pred, prob, truth, c_ax) in zip(processed_images, predictions, probabilities, true_labels, ax.flatten()):
    c_ax.imshow(img, cmap='gray')
    c_ax.set_title(f'Truth: {truth}, Predicted: {pred}, Prob: {prob:.3f}')