# Compare and plot distance map predictions of ensemble model on example subjects

- Compare plotting averaged distance maps with plotting ensembled distance maps
- what is the effect of averaging if individual model's predictions disagree?

In [1]:
import meld_graph
import meld_graph.models
import meld_graph.experiment
import meld_graph.dataset
import meld_graph.data_preprocessing
import meld_graph.evaluation

import importlib
importlib.reload(meld_graph)
importlib.reload(meld_graph.models)
importlib.reload(meld_graph.dataset)
importlib.reload(meld_graph.experiment)
importlib.reload(meld_graph.data_preprocessing)
importlib.reload(meld_graph.evaluation)

import logging
import os
import json

from meld_graph.dataset import GraphDataset, Oversampler
from meld_classifier.meld_cohort import MeldCohort, MeldSubject
from meld_graph.training import Metrics
import numpy as np
from meld_graph.paths import EXPERIMENT_PATH

from meld_graph.evaluation import Evaluator

Setting MELD_DATA_PATH to /home/co-spit1/meld_data
Setting BASE_PATH to /home/co-spit1/meld_data
Setting EXPERIMENT_PATH to /home/co-spit1/meld_experiments/co-spit1
Setting FS_SUBJECTS_PATH to /home/co-spit1/meld_data/output/fs_outputs
Setting EXPERIMENT_PATH to /rds/project/kw350/rds-kw350-meld/experiments_graph/co-spit1


In [2]:
import h5py
import matplotlib_surface_plotting as msp
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import nibabel as nb
from meld_classifier.paths import BASE_PATH

def load_prediction(subject,hdf5,dset='prediction'):
    results={}
    with h5py.File(hdf5, "r") as f:
        for hemi in ['lh','rh']:
            results[hemi] = f[subject][hemi][dset][:]
    return results

def create_surface_plots(coords,faces,overlay,flat_map=True, limits=None):
    """plot and reload surface images"""
    from meld_classifier.meld_plotting import trim
    import matplotlib_surface_plotting.matplotlib_surface_plotting as msp
    from PIL import Image
    
    if limits==None:
        vmin=np.min(overlay)
        vmax=np.max(overlay)
    else:
        vmin=limits[0]
        vmax=limits[1]
    msp.plot_surf(coords,faces, 
                overlay,
                flat_map=flat_map,
                rotate=[90, 270],
                filename='tmp.png',
                vmin=vmin,
                vmax=vmax,
             )
    im = Image.open('tmp.png')
    im = trim(im)
    im = im.convert("RGBA")
    im1 = np.array(im)
    return im1

def plot_prediction(res_dict, filename):
    import nibabel as nb
    from meld_classifier.paths import BASE_PATH

    flat = nb.load(
        os.path.join(
            BASE_PATH, "fsaverage_sym", "surf", "lh.full.patch.flat.gii"
        )
    )
    coords, faces = flat.darrays[0].data, flat.darrays[1].data

     # round up to get the square grid size
    fig= plt.figure(figsize=(11,8), constrained_layout=True)
    gs1 = GridSpec(3, 2, width_ratios=[1, 1],  wspace=0.1, hspace=0.1)
    if not np.isnan(res_dict['pred_distance_map']['lh']).any():
        data_to_plot= [res_dict['prediction']['lh'], res_dict['prediction']['rh'], 
                        res_dict['pred_distance_map']['lh'], res_dict['pred_distance_map']['rh'],
                    res_dict['labels']['lh'], res_dict['labels']['rh']]
        titles=['predictions left hemi', 'predictions right hemi', 
                'distance map left hemi', 'distance map right hemi', 
            'labels left hemi', 'labels right hemi']
    else:
        data_to_plot= [res_dict['prediction']['lh'], res_dict['prediction']['rh'], 
                    res_dict['labels']['lh'], res_dict['labels']['rh']]
        titles=['predictions left hemi', 'predictions right hemi', 
            'labels left hemi', 'labels right hemi']
    for i,overlay in enumerate(data_to_plot):
        if len(overlay) < len(cohort.cortex_mask):
            # medial wall
            overlay_tmp=np.zeros(len(cohort.cortex_mask))
            overlay_tmp[cohort.cortex_mask]= overlay
            overlay=overlay_tmp
        ax = fig.add_subplot(gs1[i])
        if 'distance' in titles[i]:
            im = create_surface_plots(coords,faces,overlay,flat_map=True)
        else:
            im = create_surface_plots(coords,faces,overlay,flat_map=True, limits=[0.2,0.8])
        ax.imshow(im)
        ax.axis('off')
        ax.set_title(titles[i], loc='left', fontsize=20)  
    fig.savefig(filename, bbox_inches='tight')
    plt.close("all")

In [3]:
# get dirs where evaluation is saved (TODO exp dirs!)

model_base_paths = {
   'dcd': '/rds/project/kw350/rds-kw350-meld/experiments_graph/kw350/22-12-15_trainval/dcd',
   'dc': '/rds/project/kw350/rds-kw350-meld/experiments_graph/kw350/22-12-15_trainval/dc',
}
folds = np.arange(0,9)

save_dirs = {
    model_name: [os.path.join(path+f'_{fold}', 'results') for fold in folds] for model_name, path in model_base_paths.items()
}


img_save_dirs = {
    model_name: f'/rds/project/kw350/rds-kw350-meld/experiments_graph/co-spit1/22-12-15_trainval/{model_name}/results/images'
    for model_name in save_dirs.keys()
}
img_save_dirs['classifier'] = f'/rds/project/kw350/rds-kw350-meld/experiments_graph/co-spit1/22-12-15_trainval/classifier/results/images'
for d in img_save_dirs.values():
    os.makedirs(d, exist_ok=True)

In [4]:
cohort = MeldCohort(hdf5_file_root='{site_code}_{group}_featurematrix_combat_6.hdf5',
               dataset='MELD_dataset_V6.csv')

# print list of available subjects
with h5py.File(os.path.join(save_dirs['dcd'][0], 'predictions.hdf5'), "r") as f:
    subjects = list(f.keys())
    print(subjects)

['MELD2_H7_3T_FCD_003', 'MELD2_H7_3T_FCD_006', 'MELD2_H7_3T_FCD_007', 'MELD2_H7_3T_FCD_008', 'MELD2_H7_3T_FCD_010', 'MELD2_H7_3T_FCD_012', 'MELD2_H7_3T_FCD_013', 'MELD_H10_3T_C_0002', 'MELD_H10_3T_C_0003', 'MELD_H10_3T_C_0004', 'MELD_H10_3T_C_0007', 'MELD_H10_3T_C_0008', 'MELD_H10_3T_C_0010', 'MELD_H10_3T_C_0016', 'MELD_H10_3T_C_0020', 'MELD_H10_3T_C_0023', 'MELD_H10_3T_C_0024', 'MELD_H10_3T_C_0026', 'MELD_H10_3T_C_0027', 'MELD_H10_3T_C_0030', 'MELD_H10_3T_C_0032', 'MELD_H10_3T_FCD_0001', 'MELD_H10_3T_FCD_0003', 'MELD_H10_3T_FCD_0004', 'MELD_H10_3T_FCD_0005', 'MELD_H10_3T_FCD_0006', 'MELD_H10_3T_FCD_0007', 'MELD_H10_3T_FCD_0010', 'MELD_H10_3T_FCD_0011', 'MELD_H10_3T_FCD_0015', 'MELD_H11_3T_FCD_0004', 'MELD_H11_3T_FCD_0005', 'MELD_H11_3T_FCD_0013', 'MELD_H11_3T_FCD_0016', 'MELD_H11_3T_FCD_0017', 'MELD_H11_3T_FCD_0021', 'MELD_H11_3T_FCD_0022', 'MELD_H11_3T_FCD_0023', 'MELD_H11_3T_FCD_0024', 'MELD_H11_3T_FCD_0026', 'MELD_H11_3T_FCD_0029', 'MELD_H11_3T_FCD_0032', 'MELD_H11_3T_FCD_0033', 'M

In [7]:
subjects = np.random.choice(subjects, 10)
subjects

array(['MELD_H4_15T_C_0008', 'MELD_H17_15T_FCD_0009',
       'MELD_H26_3T_FCD_0003', 'MELD_H17_3T_FCD_0060',
       'MELD_H2_15T_FCD_0006', 'MELD_H2_15T_FCD_0022',
       'MELD_H14_3T_FCD_0033', 'MELD_H3_3T_C_0083', 'MELD_H4_3T_FCD_0024',
       'MELD_H15_3T_C_0004'], dtype='<U21')

In [5]:
subjects = ['MELD_H4_15T_C_0008', 'MELD_H17_15T_FCD_0009',
       'MELD_H26_3T_FCD_0003', 'MELD_H17_3T_FCD_0060',
       'MELD_H2_15T_FCD_0006', 'MELD_H2_15T_FCD_0022',
       'MELD_H14_3T_FCD_0033', 'MELD_H3_3T_C_0083', 'MELD_H4_3T_FCD_0024',
       'MELD_H15_3T_C_0004']
subjects

['MELD_H4_15T_C_0008',
 'MELD_H17_15T_FCD_0009',
 'MELD_H26_3T_FCD_0003',
 'MELD_H17_3T_FCD_0060',
 'MELD_H2_15T_FCD_0006',
 'MELD_H2_15T_FCD_0022',
 'MELD_H14_3T_FCD_0033',
 'MELD_H3_3T_C_0083',
 'MELD_H4_3T_FCD_0024',
 'MELD_H15_3T_C_0004']

In [6]:
# load predictions and distances for all models and all subjects
results_dict = {
    model_name: [
        {subj: {} for subj in subjects} for _ in range(len(save_dirs[model_name]))
    ]
    for model_name in save_dirs.keys()
}

for model_name in save_dirs.keys():
    for i, save_dir in enumerate(save_dirs[model_name]):
        pred_file = os.path.join(save_dir, 'predictions.hdf5')
        for subj in subjects:
            result_hemis = load_prediction(subj,pred_file, dset='prediction')
            result_distance_hemis = load_prediction(subj,pred_file, dset='distance_map')

            s = MeldSubject(subj,cohort=cohort)
            labels_hemis = {}
            for hemi in ['lh','rh']:
                _, labels_hemis[hemi] = s.load_feature_lesion_data(
                    features=['none'], hemi=hemi, features_to_ignore=[]
                )
            results_dict[model_name][i][subj]['prediction'] = result_hemis
            results_dict[model_name][i][subj]['pred_distance_map'] =  result_distance_hemis
            results_dict[model_name][i][subj]['labels'] = labels_hemis

In [7]:
mean_results = {
    model_name: {subj: {} for subj in subjects} for model_name in results_dict.keys()
}
for model_name in results_dict.keys():
    for subj in subjects:
        mean_res = {'prediction': {}, 'pred_distance_map':{}, 'labels':{}}
        for hemi in ['lh', 'rh']:
            prediction = np.mean([res_dict[subj]['prediction'][hemi] for res_dict in results_dict[model_name]], axis=0)
            pred_distance_map = np.mean([res_dict[subj]['pred_distance_map'][hemi] for res_dict in results_dict[model_name]], axis=0)
            labels = np.mean([res_dict[subj]['labels'][hemi] for res_dict in results_dict[model_name]], axis=0)
            mean_res['prediction'][hemi] = prediction
            mean_res['pred_distance_map'][hemi] = pred_distance_map
            mean_res['labels'][hemi] = labels
        mean_results[model_name][subj] = mean_res

In [8]:
# add classifier results to mean_results
classifier_results = '/rds/project/kw350/rds-kw350-meld/experiments/co-ripa1/iteration_21-09-17/ensemble_21-09-20/fold_all/results/predictions_ensemble_iteration.hdf5'
mean_results['classifier'] = {subj:{} for subj in subjects}
for subj in subjects:
    labels = mean_results[list(mean_results.keys())[0]][subj]['labels']
    prediction = load_prediction(subj, classifier_results, dset='prediction')
    mean_results['classifier'][subj]['prediction'] = prediction
    mean_results['classifier'][subj]['labels'] = labels
    mean_results['classifier'][subj]['pred_distance_map'] = {hemi: np.zeros_like(mean_results[list(mean_results.keys())[0]][subj]['pred_distance_map'][hemi])*np.nan for hemi in ['lh', 'rh']}

In [9]:
for model_name in mean_results.keys():
    print(model_name)
    if model_name != 'classifier':
        continue
    for subj in subjects:
        print('-', subj)
        plot_prediction(mean_results[model_name][subj], 
                        filename=os.path.join(img_save_dirs[model_name], f'mean_pred_{subj}.png'))

dcd


NameError: name 'model' is not defined

In [None]:
# show results
from imageio import imread

for subj in subjects:
    plots = {}
    for model_name in img_save_dirs.keys():
        plots[model_name] = imread(os.path.join(img_save_dirs[model_name], f'mean_pred_{subj}.png'))
    
    print(subj)
    fig, axes = plt.subplots(1,3, figsize=(20,5))
    for ax, n in zip(axes, plots.keys()):
        ax.set_title(n)
        ax.imshow(plots[n])
        ax.axis('off')
    fig.suptitle(subj)

In [15]:
overlay = mean_results[model_name]['MELD_H17_15T_FCD_0009']['labels']['rh']


In [None]:
if len(overlay) < len(cohort.cortex_mask):
# medial wall
overlay_tmp=np.zeros(len(cohort.cortex_mask))
overlay_tmp[cohort.cortex_mask] = overlay
overlay=overlay_tmp

In [None]:
 import nibabel as nb
    from meld_classifier.paths import BASE_PATH

    flat = nb.load(
        os.path.join(
            BASE_PATH, "fsaverage_sym", "surf", "lh.full.patch.flat.gii"
        )
    )
    coords, faces = flat.darrays[0].data, flat.darrays[1].data

create_surface_plots(coords,faces,overlay,flat_map=True)