TESTING MODEL & ASSESSING ACCURACY
- Test model on individual discrete patients and on outliers
- Generate probability colour maps
- Calculate overlap metrics

In [30]:
import SimpleITK as sitk
import pandas as pd
import glob
import numpy as np
import os
import joblib

Create DataFrames for Test Patients (Non-outliers and outliers)

In [31]:
# load in model
p_clf = joblib.load("p-rbf-clf.joblib")

In [32]:
# function to create the DataFrame columns
def create_test_col(dir, p_ID, mode):
      # modalities =  GM, MD, FA, FICVF, ODI
    modes = {'GM': dir + 'GM-OUTPUT/GM-ZMAPS/',
            'MD': dir + 'MD-OUTPUT/MD-ZMAPS/',
            'FA': dir + 'FA-OUTPUT/FA-ZMAPS/',
            'FICVF': dir + 'FICVF-OUTPUT/FICVF-ZMAPS/',
            'ODI': dir + 'ODI-OUTPUT/ODI-ZMAPS/'
           }
    # data is sitk image type
    # p_ID is patient ID (string)
    
    file = glob.glob( modes[mode] + "*" + p_ID + "*.nii")
    if not file:
        print(mode + " modality doesn't exist for " + p_ID)
        return [0] * 2122945 # voxels in flattened mri

    img = sitk.ReadImage(file[0])
    arr = sitk.GetArrayFromImage(img)
    arr = arr.flatten()
    return arr

# function to bring DataFrame columns together into single DF
def create_test_df(dir, test_patient):
    data = {'GM' : create_test_col(dir, test_patient, "GM"),
                        'FA' : create_test_col(dir, test_patient, "FA"),
                        'MD' : create_test_col(dir, test_patient, "MD"),
                        'FICVF' : create_test_col(dir, test_patient, "FICVF"),
                        'ODI' : create_test_col(dir, test_patient, "ODI"),
        }
    df = pd.DataFrame(data, index=None)
    return df

In [33]:
# these are the patients we are testing the model with and generation probability maps on
# patients have not been seen by the model during training process
patient_ids = ["D044", "D045", "D046", "D047", "D048", "D049"]

patient_dfs = []
dir = "OUTPUT/DISCRETE/"

print("Creating test patient DFS...")
for patient in patient_ids:
    df = create_test_df(dir, patient)
    patient_dfs.append(df)
    print("DF created for patient: ", patient)

Creating test patient DFS...
DF created for patient:  D044
DF created for patient:  D045
DF created for patient:  D046
DF created for patient:  D047
DF created for patient:  D048
DF created for patient:  D049


In [34]:
# outlier patients have larger lesion sizes than training data used (~2500 voxels)
# D021 lesion size: 6645 voxels
# D035 lesion size: 11836 voxels
outlier_ids = ["D021", "D035"]
outlier_dfs = []
print("\nCreating outlier patient DFS...")
for outlier in outlier_ids:
    df = create_test_df(dir, outlier)
    outlier_dfs.append(df)
    print("DF created for outlier: ", outlier)
    
print("Patient & Outlier DataFrames Created")


Creating outlier patient DFS...
DF created for outlier:  D021
DF created for outlier:  D035
Patient & Outlier DataFrames Created


Generate Probability Colour Maps

In [35]:
# Function to generate probability maps

# simply using example_img so we can use the sitk function CopyInformation to ensure when we convert from
# array to image, the original image properties are maintained
example_img = sitk.ReadImage("OUTPUT/DISCRETE/GM-OUTPUT/GM-ZMAPS/Z-smwc1D049_T1.nii") 
def generate_colour_map(OUTPUT_DIR, df, p_ID):
    X_test_patient = df
    print("Predicting probability of patient: ", p_ID)
    y_pred_patient = p_clf.predict_proba(X_test_patient)
    print("Completed prediction of patient: ", p_ID)
    print("Converting to MRI")
    vals = [y[1] for y in y_pred_patient] # take probabilities that there is a lesion present
    vals = np.asarray(vals) 
    y_pred_patient_3D = vals.reshape((121, 145, 121)) # convert back to 3D array (MRI)
    y_pred_patient_3D_img = sitk.GetImageFromArray(y_pred_patient_3D) # convert 3D array to nifti image
    y_pred_patient_3D_img.CopyInformation(example_img)
    filename = "Probability-Map-" + p_ID + ".nii"
    print("Writing image...")
    sitk.WriteImage(y_pred_patient_3D_img, os.path.join(OUTPUT_DIR, filename))
    print("Probability map successfully generated!")
    print("-----------------------------------------")

In [36]:
print("Generating probability maps for test patients...")
dir ="OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS"

for df, p_id in zip(patient_dfs, patient_ids):
    generate_colour_map(dir, df, p_id)

Generating probability maps for test patients...
Predicting probability of patient:  D044
Completed prediction of patient:  D044
Converting to MRI
Writing image...
Probability map successfully generated!
-----------------------------------------
Predicting probability of patient:  D045
Completed prediction of patient:  D045
Converting to MRI
Writing image...
Probability map successfully generated!
-----------------------------------------
Predicting probability of patient:  D046
Completed prediction of patient:  D046
Converting to MRI
Writing image...
Probability map successfully generated!
-----------------------------------------
Predicting probability of patient:  D047
Completed prediction of patient:  D047
Converting to MRI
Writing image...
Probability map successfully generated!
-----------------------------------------
Predicting probability of patient:  D048
Completed prediction of patient:  D048
Converting to MRI
Writing image...
Probability map successfully generated!
--------

In [37]:
print("Generating probability maps for outliers...")
dir ="OUTPUT/PROBABILITY-MAPS/OUTLIERS"

for df, p_id in zip(outlier_dfs, outlier_ids):
    generate_colour_map(dir, df, p_id)

Generating probability maps for outliers...
Predicting probability of patient:  D021
Completed prediction of patient:  D021
Converting to MRI
Writing image...
Probability map successfully generated!
-----------------------------------------
Predicting probability of patient:  D035
Completed prediction of patient:  D035
Converting to MRI
Writing image...
Probability map successfully generated!
-----------------------------------------


Segment Probability Maps

In [38]:
# we want to exclude the air around the resulting probability map so we use
# a segmentation label created using 3DSlicer and simply multiply the 2 images together
def segment_prob_map(dir, predicted_img, segment_mask_arr):
    pred_img = sitk.ReadImage(predicted_img)
    pred_img_arr = sitk.GetArrayFromImage(pred_img)
    segmented_pred = segment_mask_arr * pred_img_arr
    segmented_pred_img = sitk.GetImageFromArray(segmented_pred)
    segmented_pred_img.CopyInformation(example_img)
    filename = "S-" + os.path.basename(predicted_img) + ".nii"
    sitk.WriteImage(segmented_pred_img, os.path.join(dir, filename))

In [39]:
# use this segmentation label to exclude air around brain scan
segment_mask = sitk.ReadImage("Segmentation-label.nrrd")
mask_arr = sitk.GetArrayFromImage(segment_mask)
mask_arr[mask_arr == 2] = 0 # ensure that positive labels are 1 and negative labels are 0 

In [40]:
print("Segmenting probability maps for test patients...")
test_maps = glob.glob("OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS/*.nii")
dir ="OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS/SEGMENTED-MAPS"
for map in test_maps:
    segment_prob_map(dir, map, mask_arr)
print("Segmented probability maps created for test patients")

Segmenting probability maps for test patients...
Segmented probability maps created for test patients


In [41]:
print("Segmenting probability maps for outliers...")
dir ="OUTPUT/PROBABILITY-MAPS/OUTLIERS/SEGMENTED-MAPS"
outlier_maps = glob.glob("OUTPUT/PROBABILITY-MAPS/OUTLIERS/*.nii")
for map in outlier_maps:
    segment_prob_map(dir, map, mask_arr)
print("Outlier probability maps segmented")

Segmenting probability maps for outliers...
Outlier probability maps segmented


Calculate Dice score for test patients

In [42]:
from scipy.spatial import distance
# function takes in a predicted patient probability map, a patient's specific spherical segment label map
# around the lesion and a threshold value to which the voxel values will be considered as as abnormal
def dice_score(patient, segment, lesion, threshold):
    segment = sitk.ReadImage(segment)
    segment_arr = sitk.GetArrayFromImage(segment)
    segment_arr[segment_arr == 2 ] = 0

    patient = sitk.ReadImage(patient)
    patient_arr = sitk.GetArrayFromImage(patient)
    # must convert probability to binary value for dice score
    # also thresholding according to specific value
    patient_arr[patient_arr >= threshold] = 1
    patient_arr[patient_arr < threshold] = 0

    seg_patient_arr = patient_arr * segment_arr # segment area
    lesion_arr = sitk.GetArrayFromImage(sitk.ReadImage(lesion))
    
    seg = seg_patient_arr
    truth = lesion_arr

    
    # numpy dice calculates dice dissimilarily of 2 1D arrays so we must flatten our arrays
    # and subtract from 1 to get dice similarity
    dice_sc = (1-distance.dice(seg.flatten(), truth.flatten()))
    return dice_sc


In [88]:
# Calculate dice score of individual test patients at different thresholds for report

print("PATIENT D021 DICE SCORE (outlier)")
print("At threshold 0.5 = ", dice_score("OUTPUT/PROBABILITY-MAPS/OUTLIERS/SEGMENTED-MAPS/S-Probability-Map-D021.nii", "Segmentation-label-D021.nrrd", "DISCRETE/Lesion-Masks/wD021_Lesion.nii", 0.5))
print("At threshold 0.8 = ", dice_score("OUTPUT/PROBABILITY-MAPS/OUTLIERS/SEGMENTED-MAPS/S-Probability-Map-D021.nii", "Segmentation-label-D021.nrrd", "DISCRETE/Lesion-Masks/wD021_Lesion.nii", 0.8))
print("At threshold 0.94 = ", dice_score("OUTPUT/PROBABILITY-MAPS/OUTLIERS/SEGMENTED-MAPS/S-Probability-Map-D021.nii", "Segmentation-label-D021.nrrd", "DISCRETE/Lesion-Masks/wD021_Lesion.nii", 0.94))
print("---------------------------------")

print("PATIENT D035 DICE SCORE (outlier)")
print("At threshold 0.5 = ", dice_score("OUTPUT/PROBABILITY-MAPS/OUTLIERS/SEGMENTED-MAPS/S-Probability-Map-D035.nii", "Segmentation-label-D035.nrrd", "DISCRETE/Lesion-Masks/wD035_Lesion.nii", 0.5))
print("At threshold 0.8 = ", dice_score("OUTPUT/PROBABILITY-MAPS/OUTLIERS/SEGMENTED-MAPS/S-Probability-Map-D035.nii", "Segmentation-label-D035.nrrd", "DISCRETE/Lesion-Masks/wD035_Lesion.nii", 0.8))
print("At threshold 0.94 = ", dice_score("OUTPUT/PROBABILITY-MAPS/OUTLIERS/SEGMENTED-MAPS/S-Probability-Map-D035.nii", "Segmentation-label-D035.nrrd", "DISCRETE/Lesion-Masks/wD035_Lesion.nii", 0.94))
print("---------------------------------")

print("PATIENT D044 DICE SCORE")
print("At threshold 0.5 = ", dice_score("OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS/SEGMENTED-MAPS/S-Probability-Map-D044.nii", "Segmentation-label-D044.nrrd", "DISCRETE/Lesion-Masks/wD044_Lesion.nii", 0.5))
print("At threshold 0.8 = ", dice_score("OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS/SEGMENTED-MAPS/S-Probability-Map-D044.nii", "Segmentation-label-D044.nrrd", "DISCRETE/Lesion-Masks/wD044_Lesion.nii", 0.8))
print("At threshold 0.94 = ", dice_score("OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS/SEGMENTED-MAPS/S-Probability-Map-D044.nii", "Segmentation-label-D044.nrrd", "DISCRETE/Lesion-Masks/wD044_Lesion.nii", 0.94))
print("---------------------------------")

print("PATIENT D045 DICE SCORE")
print("At threshold 0.5 = ", dice_score("OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS/SEGMENTED-MAPS/S-Probability-Map-D045.nii", "Segmentation-label-D045.nrrd", "DISCRETE/Lesion-Masks/wD045_Lesion.nii", 0.5))
print("At threshold 0.8 = ", dice_score("OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS/SEGMENTED-MAPS/S-Probability-Map-D045.nii", "Segmentation-label-D045.nrrd", "DISCRETE/Lesion-Masks/wD045_Lesion.nii", 0.8))
print("At threshold 0.94 = ", dice_score("OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS/SEGMENTED-MAPS/S-Probability-Map-D045.nii", "Segmentation-label-D045.nrrd", "DISCRETE/Lesion-Masks/wD045_Lesion.nii", 0.94))
print("---------------------------------")

print("PATIENT D046 DICE SCORE")
print("At threshold 0.5 = ", dice_score("OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS/SEGMENTED-MAPS/S-Probability-Map-D046.nii", "Segmentation-label-D046.nrrd", "DISCRETE/Lesion-Masks/wD046_Lesion.nii", 0.5))
print("At threshold 0.8 = ", dice_score("OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS/SEGMENTED-MAPS/S-Probability-Map-D046.nii", "Segmentation-label-D046.nrrd", "DISCRETE/Lesion-Masks/wD046_Lesion.nii", 0.8))
print("At threshold 0.94 = ", dice_score("OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS/SEGMENTED-MAPS/S-Probability-Map-D046.nii", "Segmentation-label-D046.nrrd", "DISCRETE/Lesion-Masks/wD046_Lesion.nii", 0.94))
print("---------------------------------")

print("PATIENT D047 DICE SCORE")
print("At threshold 0.5 = ", dice_score("OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS/SEGMENTED-MAPS/S-Probability-Map-D047.nii", "Segmentation-label-D047.nrrd", "DISCRETE/Lesion-Masks/wD047_Lesion.nii", 0.5))
print("At threshold 0.8 = ", dice_score("OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS/SEGMENTED-MAPS/S-Probability-Map-D047.nii", "Segmentation-label-D047.nrrd", "DISCRETE/Lesion-Masks/wD047_Lesion.nii", 0.8))
print("At threshold 0.94 = ", dice_score("OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS/SEGMENTED-MAPS/S-Probability-Map-D047.nii", "Segmentation-label-D047.nrrd", "DISCRETE/Lesion-Masks/wD047_Lesion.nii", 0.94))
print("---------------------------------")

print("PATIENT D048 DICE SCORE")
print("At threshold 0.5 = ", dice_score("OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS/SEGMENTED-MAPS/S-Probability-Map-D048.nii", "Segmentation-label-D048.nrrd", "DISCRETE/Lesion-Masks/wD048_Lesion.nii", 0.5))
print("At threshold 0.8 = ", dice_score("OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS/SEGMENTED-MAPS/S-Probability-Map-D048.nii", "Segmentation-label-D048.nrrd", "DISCRETE/Lesion-Masks/wD048_Lesion.nii", 0.8))
print("At threshold 0.94 = ", dice_score("OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS/SEGMENTED-MAPS/S-Probability-Map-D048.nii", "Segmentation-label-D048.nrrd", "DISCRETE/Lesion-Masks/wD048_Lesion.nii", 0.94))
print("---------------------------------")

print("PATIENT D049 DICE SCORE")
print("At threshold 0.5 = ", dice_score("OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS/SEGMENTED-MAPS/S-Probability-Map-D049.nii", "Segmentation-label-D049.nrrd", "DISCRETE/Lesion-Masks/wD049_Lesion.nii", 0.5))
print("At threshold 0.8 = ", dice_score("OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS/SEGMENTED-MAPS/S-Probability-Map-D049.nii", "Segmentation-label-D049.nrrd", "DISCRETE/Lesion-Masks/wD049_Lesion.nii", 0.8))
print("At threshold 0.94 = ", dice_score("OUTPUT/PROBABILITY-MAPS/TEST-PATIENTS/SEGMENTED-MAPS/S-Probability-Map-D049.nii", "Segmentation-label-D049.nrrd", "DISCRETE/Lesion-Masks/wD049_Lesion.nii", 0.94))
print("---------------------------------")



PATIENT D021 DICE SCORE (outlier)
At threshold 0.5 =  0.30695744475537967
At threshold 0.8 =  0.18018644775288717
At threshold 0.94 =  0.08483079449055919
---------------------------------
PATIENT D035 DICE SCORE (outlier)
At threshold 0.5 =  0.5486725663716814
At threshold 0.8 =  0.6032170089608727
At threshold 0.94 =  0.6577731502570443
---------------------------------
PATIENT D044 DICE SCORE
At threshold 0.5 =  0.33638443935926776
At threshold 0.8 =  0.5092327698309493
At threshold 0.94 =  0.6597141753572808
---------------------------------
PATIENT D045 DICE SCORE
At threshold 0.5 =  0.27979274611398963
At threshold 0.8 =  0.3275897020068923
At threshold 0.94 =  0.4099740932642487
---------------------------------
PATIENT D046 DICE SCORE
At threshold 0.5 =  0.11600394347683207
At threshold 0.8 =  0.13734072561034916
At threshold 0.94 =  0.20177956371986228
---------------------------------
PATIENT D047 DICE SCORE
At threshold 0.5 =  0.37027569837502283
At threshold 0.8 =  0.450259