# Script for generating plots 
This notebook contains the tools for displaying the output of the event-related fMRI data analyses found at: https://github.com/CameronTEllis/event_related_fmri_tda. 

This assumes 1) you ran at least some of the simulation/analyses of the data (using "./code/supervisor_supersubject.sh" for instance). 2) you are able to launch the jupyter notebook with the same environment that you used to run the code.

In [None]:
%matplotlib notebook

import sys
import os
import numpy as np

import matplotlib.pyplot as plt
from matplotlib import colors
import nibabel as nb
import scipy.spatial.distance as sp_distance
import sklearn.manifold as manifold
from mpl_toolkits.mplot3d import Axes3D
from scipy.optimize import curve_fit
from scipy.stats import t
import generate_graph_structure as graphs
import glob

## Setup some functions that will be used throughout

In [None]:
def average_node_brain_dist(filename, mask_vec):
    nii = nb.load(filename)
    data_vol = nii.get_data()

    # Turn the data into a vector
    data_vec = data_vol.reshape((np.prod(data_vol.shape[0:3])), data_vol.shape[3])

    # Mask the data
    data_masked = data_vec[mask_vec]

    # Average the voxels in the masked region
    data_av = np.mean(data_masked, 0)

    # Reshape the data to be node by node
    nodes = int(np.ceil(np.sqrt(len(data_av) * 2)))

    # What are the indices for the upper triangle
    idxs_u = np.triu_indices(nodes, 1)
    idxs_l = np.tril_indices(nodes, -1)

    # Insert the data into a dist matrix
    data_dist = np.zeros((nodes, nodes))

    # Add the data
    data_dist[idxs_u[0], idxs_u[1]] = data_av

    # Symmetrize the data
    data_dist = (data_dist.T + data_dist) / 2
    
    # Return the distance matrix
    return data_dist

In [None]:
def plot_summary_stats(data, keys, line_style, color):
    
    plotting_data = []
    plotting_error = []
    for key in keys:
        
        # Pull out the data
        data_point = data[key]
        
        # Is this a dictionary
        if isinstance(data_point, dict):
            # If these are different participants then average them and add error bars
            ppt_data = list(data_point.values())
            plotting_data += [np.mean(ppt_data)]
            plotting_error += [np.std(ppt_data) / np.sqrt(len(ppt_data))]
        else:
            plotting_data += [data_point]
    
    # Plot the data
    plt.plot(plotting_data, line_style, color= color)
    if len(plotting_error) == len(plotting_data):
        plt.errorbar(np.arange(len(keys)), plotting_data, plotting_error, linestyle=line_style, ecolor=color, color=color)
    
    

In [None]:
# Set plotting style
short_style = '--'
long_style = '-'
low_nodes_color = np.asarray([253, 231, 36]) / 255
mid_nodes_color = np.asarray([102, 45, 145]) / 255
high_nodes_color = np.asarray([255, 127, 14]) / 255

In [None]:
mask_name='../simulator_parameters/real_results/significant_mask.nii.gz'
subject_root = '../simulated_data/node_brain_dist/'
supersubject_root = '../simulated_data/supersubject_node_brain_dist/'
nii = nb.load(mask_name)
mask_vol = nii.get_data()
mask_vec = mask_vol.reshape((np.prod(mask_vol.shape[0:3]))) == 1

## Pull out the MDS representations from the fully simulated data
This gives you a simple and easy way to visual the structure of the data in the signal voxels

In [None]:
resample_counter = 1
deconvolution = 1
subj = 1 # If zero then do the supersubject

for nodes in [12, 15, 18]:
    for repetitions_per_run in [5, 10]:

        plt.figure(figsize=(12, 2))
        signal_steps = [0.0, 0.25, 0.5, 0.75, 1.0, 5.0]
        for counter, signal_size in enumerate(signal_steps):

            # Set the subject name
            if subj == 0:
                subject_name = ''
                file_root = supersubject_root
            else:
                subject_name = 'sub-%d_' % subj
                file_root = subject_root

            # Create a variable with a specific dp precision
            signal_number = '%0.2f' % signal_size
            if signal_number[-1] == '0':
                signal_number = signal_number[:-1]

            filename = '%s/%selipse_s-%s_1_1_t_5.0_1_1.0_%d_%d_%d_resample-%d.nii.gz' % (file_root, subject_name, signal_number, repetitions_per_run, nodes, deconvolution, resample_counter)

            duration = (repetitions_per_run * nodes * 5 * 9) / 60  # How long in minutes is this experiment
            plt.suptitle('reps-%d, nodes-%d, length-%0.1f, deconv-%d' % (repetitions_per_run, nodes, duration, deconvolution))
            plt.subplot(1, len(signal_steps), counter + 1)
            plt.xlabel('s-%0.1f' % signal_size)
            plt.xticks([])
            plt.yticks([])

            if os.path.exists(filename):
                data_dist=average_node_brain_dist(filename, mask_vec)
                graphs.make_mds(data_dist,
                                dim=2,
                                )
        plt.savefig('../plots/example_mds_%snodes_%d_repetitions_%d.eps' % (subject_name, nodes, repetitions_per_run))
        plt.savefig('../plots/example_mds_%snodes_%d_repetitions_%d.png' % (subject_name, nodes, repetitions_per_run))            


In [None]:
resample_counter = 1
deconvolution = 1
subj = 0 # If zero then do the supersubject

for nodes in [12, 15, 18]:
    for repetitions_per_run in [5, 10]:

        plt.figure(figsize=(12, 2))
        signal_steps = [0.0, 0.25, 0.5, 0.75, 1.0, 5.0]
        for counter, signal_size in enumerate(signal_steps):

            # Set the subject name
            if subj == 0:
                subject_name = ''
                file_root = supersubject_root
            else:
                subject_name = 'sub-%d_' % subj
                file_root = subject_root

            # Create a variable with a specific dp precision
            signal_number = '%0.2f' % signal_size
            if signal_number[-1] == '0':
                signal_number = signal_number[:-1]

            filename = '%s/%selipse_s-%s_1_1_t_5.0_1_1.0_%d_%d_%d_resample-%d.nii.gz' % (file_root, subject_name, signal_number, repetitions_per_run, nodes, deconvolution, resample_counter)

            duration = (repetitions_per_run * nodes * 5 * 9) / 60  # How long in minutes is this experiment
            plt.suptitle('reps-%d, nodes-%d, length-%0.1f, deconv-%d' % (repetitions_per_run, nodes, duration, deconvolution))
            plt.subplot(1, len(signal_steps), counter + 1)
            plt.xlabel('s-%0.1f' % signal_size)
            plt.xticks([])
            plt.yticks([])

            if os.path.exists(filename):
                data_dist=average_node_brain_dist(filename, mask_vec)
                graphs.make_mds(data_dist,
                                dim=2,
                                )
        plt.savefig('../plots/example_mds_%snodes_%d_repetitions_%d.eps' % (subject_name, nodes, repetitions_per_run))
        plt.savefig('../plots/example_mds_%snodes_%d_repetitions_%d.png' % (subject_name, nodes, repetitions_per_run))            


## Plot the stress and higher dimensionality of example MDS data
Use this to observe how much variance the MDS plots described above show

In [None]:
stress = []

signal_number = '5.0'
mask_vec_roi = np.zeros(mask_vec.shape)
mask_vec_roi[np.where(mask_vec == 1)[:27]] = 1
mask_vec_roi = mask_vec_roi == 1
filename = '%s/%selipse_s-%s_1_1_t_5.0_1_1.0_%d_%d_%d_resample-%d.nii.gz' % (file_root, subject_name, signal_number, repetitions_per_run, nodes, deconvolution, resample_counter)
data_dist=average_node_brain_dist(filename, mask_vec_roi)

for n_component in range(1, 10):
    mds = manifold.MDS(n_components=n_component, dissimilarity='precomputed')
    dist_obj = mds.fit(data_dist)
    stress += [dist_obj.stress_]
    
plt.figure()
plt.title('Stress of data')
plt.plot(range(1, 10), stress)
plt.ylabel('Stress')
plt.xlabel('Components')

# Look at the data in 4d
plt.figure()
graphs.make_mds(data_dist,
                dim=4,
                )

In [None]:
mds = manifold.MDS(n_components=4, dissimilarity='precomputed')  # Fit the
# mds
# object
coords = mds.fit(data_dist).embedding_  # Find the mds coordinates

np.save('%s/example_s-%s_dist.npy' % (file_root, signal_number), data_dist)
np.save('%s/example_s-%s_coords.npy' % (file_root, signal_number), coords)


## Make max loop signal plots
Make the plot of the maximum loop persistence for each condition

In [None]:
# Load in all of the summary statistics 
fid = open('../searchlight_summary/signal_vs_flipped.txt', 'r')
file_txt = fid.readlines()
fid.close()

subject_data = {}
supersubject_data = {}
for line in file_txt:
    
    # What condition is it
    condition = line[line.find('elipse'):line.find('_loop')]
    
    # What is the value
    val = float(line[line.find(': ') + 2:line.find('\n')])
    
    # Is it a supersubject or not?
    if line.find('sub-') > -1:
        
        ppt = line[line.find('sub-'):line.find('_elipse')]
        
        # Add a dictionary if it doesn't exist
        if condition not in subject_data:
            subject_data[condition] = {}
            
        # Add to the list    
        subject_data[condition][ppt] = val
    else:
        supersubject_data[condition] = val
    

In [None]:
plt.figure()

# Set the variables
signal_steps = ['0.0', '0.25', '0.5', '0.75', '1.0', '5.0']
trials = 25
nodes = 12
resample = 1
nodes = 15

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(subject_data, keys, short_style, high_nodes_color)


In [None]:
plt.figure()

# Set the variables
signal_steps = ['0.0', '0.25', '0.5', '0.75', '1.0', '5.0']
trials = 5
nodes = 12
resample = 1
threshold = stats.t.ppf(0.9995, 441)


keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(subject_data, keys, short_style, low_nodes_color)

nodes = 15

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(subject_data, keys, short_style, mid_nodes_color)

nodes = 18

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(subject_data, keys, short_style, high_nodes_color)


# Set the variables
nodes = 12
trials = 10

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(subject_data, keys, long_style, low_nodes_color)

nodes = 15

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(subject_data, keys, long_style, mid_nodes_color)

nodes = 18

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(subject_data, keys, long_style, high_nodes_color)

plt.plot([0, len(signal_steps) - 1], [threshold, threshold], 'k--')
plt.xticks(np.arange(len(signal_steps)), signal_steps)
plt.ylabel('t stat')
plt.xlabel('Percent signal change')
plt.ylim([-10, 50])

plt.savefig('../plots/subjectwise_max_loop_tstat.eps')
plt.savefig('../plots/subjectwise_max_loop_tstat.png')            


In [None]:
plt.figure()

# Set the variables
signal_steps = ['0.0', '0.25', '0.5', '0.75', '1.0', '5.0']
trials = 5
nodes = 12
resample = 1

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(supersubject_data, keys, short_style, low_nodes_color)

nodes = 15

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(supersubject_data, keys, short_style, mid_nodes_color)

nodes = 18

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(supersubject_data, keys, short_style, high_nodes_color)


# Set the variables
nodes = 12
trials = 10

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(supersubject_data, keys, long_style, low_nodes_color)

nodes = 15

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(supersubject_data, keys, long_style, mid_nodes_color)

nodes = 18

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(supersubject_data, keys, long_style, high_nodes_color)

plt.plot([0, len(signal_steps) - 1], [threshold, threshold], 'k--')
plt.xticks(np.arange(len(signal_steps)), signal_steps)
plt.ylabel('t stat')
plt.xlabel('Percent signal change')
plt.ylim([-10, 50])

plt.savefig('../plots/supersubject_max_loop_tstat.eps')
plt.savefig('../plots/supersubject_max_loop_tstat.png')            

## Make the plots for the matching loop number
Make the plots showing the frequency of getting exactly one loop (a single point in the 1-Dimensional persistence diagram)

In [None]:
# Load in all of the summary statistics 
fid = open('../searchlight_summary/signal_ratio.txt', 'r')
file_txt = fid.readlines()
fid.close()

subject_signal = {}
supersubject_signal = {}
for line in file_txt:
    
    # What condition is it
    condition = line[line.find('elipse'):line.find('_loop')]
    
    # What is the value
    val = float(line[line.find(': ') + 2:line.find('\n')])
    
    # Is it a supersubject or not?
    if line.find('sub-') > -1:
        
        ppt = line[line.find('sub-'):line.find('_elipse')]
        
        # Add a dictionary if it doesn't exist
        if condition not in subject_signal:
            subject_signal[condition] = {}
            
        # Add to the list    
        subject_signal[condition][ppt] = val
    else:
        supersubject_signal[condition] = val

In [None]:
# Load in all of the summary statistics 
fid = open('../searchlight_summary/flipped_ratio.txt', 'r')
file_txt = fid.readlines()
fid.close()

subject_noise = {}
supersubject_noise = {}
for line in file_txt:
    
    # What condition is it
    condition = line[line.find('elipse'):line.find('_loop')]
    
    if condition.find('_12_') > 0:
        condition_name = 'low'
    elif condition.find('_15_') > 0:
        condition_name = 'mid'
    elif condition.find('_18_') > 0:
        condition_name = 'high'
        
    # What is the value
    val = float(line[line.find(': ') + 2:line.find('\n')])
    
    # Is it a supersubject or not?
    if line.find('sub-') > -1:
        # Add a dictionary if it doesn't exist
        if condition_name not in subject_noise:
            subject_noise[condition_name] = []
            
        subject_noise[condition_name] += [val]
    else:
        # Add a dictionary if it doesn't exist
        if condition_name not in supersubject_noise:
            supersubject_noise[condition_name] = []
            
        supersubject_noise[condition_name] += [val]

In [None]:
plt.figure()

# Set the variables
signal_steps = ['0.0', '0.25', '0.5', '0.75', '1.0', '5.0']
trials = 5
nodes = 12
resample = 1

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(subject_signal, keys, short_style, low_nodes_color)

nodes = 15

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(subject_signal, keys, short_style, mid_nodes_color)

nodes = 18

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(subject_signal, keys, short_style, high_nodes_color)


# Set the variables
nodes = 12
trials = 10

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(subject_signal, keys, long_style, low_nodes_color)

nodes = 15

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(subject_signal, keys, long_style, mid_nodes_color)

nodes = 18

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(subject_signal, keys, long_style, high_nodes_color)

# Plot the noise pattern
plt.axhspan(np.min(subject_noise['low']), np.max(subject_noise['low']), xmin=0.91,xmax=0.94, alpha=0.1, color=low_nodes_color)
plt.axhspan(np.min(subject_noise['mid']), np.max(subject_noise['mid']), xmin=0.941, xmax=0.97, alpha=0.1, color=mid_nodes_color)
plt.axhspan(np.min(subject_noise['high']), np.max(subject_noise['high']), xmin=0.971, xmax=1.0, alpha=0.1, color=high_nodes_color)

# Plot the graph features
plt.xticks(np.arange(len(signal_steps)), signal_steps)
plt.ylabel('t stat')
plt.xlabel('Percent signal change')
plt.ylim([0, 1])
plt.xlim([0, 5.5])

plt.savefig('../plots/subjectwise_ratio_tstat.eps')
plt.savefig('../plots/subjectwise_ratio_tstat.png')            


In [None]:
plt.figure()

# Set the variables
signal_steps = ['0.0', '0.25', '0.5', '0.75', '1.0', '5.0']
trials = 5
nodes = 12
resample = 1

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(supersubject_signal, keys, short_style, low_nodes_color)

nodes = 15

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(supersubject_signal, keys, short_style, mid_nodes_color)

nodes = 18

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(supersubject_signal, keys, short_style, high_nodes_color)


# Set the variables
nodes = 12
trials = 10

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(supersubject_signal, keys, long_style, low_nodes_color)

nodes = 15

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(supersubject_signal, keys, long_style, mid_nodes_color)

nodes = 18

keys = []
for s in signal_steps:

    # Store the keys
    keys += ['elipse_s-%s_1_1_t_5.0_1_1.0_%s_%d_1_resample-%d' % (s, trials, nodes, resample)]
    
plot_summary_stats(supersubject_signal, keys, long_style, high_nodes_color)

# Plot the noise pattern
plt.axhspan(np.min(supersubject_noise['low']), np.max(supersubject_noise['low']), xmin=0.91,xmax=0.94, alpha=0.1, color=low_nodes_color)
plt.axhspan(np.min(supersubject_noise['mid']), np.max(supersubject_noise['mid']), xmin=0.941, xmax=0.97, alpha=0.1, color=mid_nodes_color)
plt.axhspan(np.min(supersubject_noise['high']), np.max(supersubject_noise['high']), xmin=0.971, xmax=1.0, alpha=0.1, color=high_nodes_color)

# Plot the graph features
plt.xticks(np.arange(len(signal_steps)), signal_steps)
plt.ylabel('t stat')
plt.xlabel('Percent signal change')
plt.ylim([0, 1])
plt.xlim([0, 5.5])

plt.savefig('../plots/supersubject_ratio_tstat.eps')
plt.savefig('../plots/supersubject_ratio_tstat.png')            


# Generate the elements for the methods figure in the manuscript
This figure shows the topological structure being inserted in to the brain, the ROI containing signal, an example distance matrix and an example persistence diagram

In [None]:
coords=graphs.elipse(nodes=12, x_coef=1, y_coef=1)
dist=graphs.coord2dist(coords)
plt.figure()
graphs.make_mds(dist,
                dim=2,
                )
plt.axis('off');
plt.savefig('../plots/example_loop.eps')
plt.savefig('../plots/example_loop.png')

## Plot a brain slice

In [None]:
filename = '../simulated_data/nifti/sub-1_r1_elipse_s-5.0_1_1_t_5.0_1_1.0_5_12_1.nii.gz'

nii = nb.load(filename)
vol = nii.get_data()

# Pull out the slices to show
x_idx = 11
brain_slice = np.rot90(vol[x_idx, :, :, 0], 1)
mask_slice = np.rot90(mask_vol[x_idx, :, :], 1)

# Find an example searchlight
y_idx = np.where(mask_slice == 1)[0][10]
z_idx = np.where(mask_slice == 1)[1][10]

# Create a searchlight mask
searchlight_slice = np.zeros(mask_slice.shape)
searchlight_slice[y_idx - 1:y_idx + 2, z_idx - 1:z_idx + 2] = 1

# Set the range to 1
brain_slice /= brain_slice.max()
mask_slice /= mask_slice.max()

# Construct RGB version of grey-level image
img_rgb = np.dstack((brain_slice, brain_slice, brain_slice))

mask_rgb = np.zeros(img_rgb.shape)
mask_rgb[:, :, 2] = mask_slice

searchlight_rgb = np.zeros(img_rgb.shape)
searchlight_rgb[:, :, 1] = searchlight_slice

# Convert the input image and color mask to Hue Saturation Value (HSV)
# colorspace
img_hsv = colors.rgb_to_hsv(img_rgb)
mask_hsv = colors.rgb_to_hsv(mask_rgb)
searchlight_hsv = colors.rgb_to_hsv(searchlight_rgb)

# Replace the hue and saturation of the original image
# with that of the color mask
alpha = 0.7
img_hsv[:, :, 0] = mask_hsv[:, :, 0]
img_hsv[:, :, 1] = mask_hsv[:, :, 1] * alpha

# Super impose the searchlight
roi_hue = np.unique(searchlight_hsv[:, :, 0])[1]
roi_sat = np.unique(searchlight_hsv[:, :, 1])[1]
idxs = np.where(searchlight_hsv[:, :, 0] == roi_hue)

img_hsv[idxs[0], idxs[1], 0] = roi_hue
img_hsv[idxs[0], idxs[1], 1] = roi_sat

# Convert back into rgb
img_masked = colors.hsv_to_rgb(img_hsv)

plt.figure()
plt.imshow(img_masked)
plt.axis('off')

# Example brain
plt.savefig('../plots/example_brain_searchlight.eps')
plt.savefig('../plots/example_brain_searchlight.png')

## Create the distrance matrix to be displayed

In [None]:
filename = '../simulated_data/node_brain_dist//sub-1_elipse_s-1.0_1_1_t_5.0_1_1.0_10_12_1_resample-1.nii.gz'

data_dist = average_node_brain_dist(filename, mask_vec)

plt.figure()
plt.imshow(data_dist)
plt.axis('off')
plt.savefig('../plots/example_distance_mat.eps')
plt.savefig('../plots/example_distance_mat.png')

## Run persistent homology on a searchlight voxel
If TDA has trouble being loaded in to notebooks, use the script as specified below to run persistent homology outside of the notebook and then load in the output

In [None]:
# Create an example barcode
input_file = 'simulated_data/node_brain_dist//sub-1_elipse_s-1.0_1_1_t_5.0_1_1.0_10_12_1_resample-1.nii.gz'
output_file = 'searchlight_summary/example_barcode.npy' # Use path relative to the base
coordinates = '[%d,%d,%d]' % (x_idx,y_idx,z_idx)
!cd ../; ./code/run_TDA_coordinate.sh $input_file $coordinates $output_mat; cd code/

In [None]:
# Load the barcode
barcode = np.load('../' + output_file)

In [None]:
# Plot the persistence diagram
plt.figure()

# Pull out all the features
births = barcode[:, 0]
deaths = barcode[:, 1]
betti = barcode[:, 2]

plt.scatter(births[betti==1], deaths[betti==1], marker='x')
plt.scatter(births[betti==0], deaths[betti==0])

# Get the diagonal
all_vals = barcode[:,:2].flatten()
min_val = all_vals.min()
max_val = all_vals[np.argsort(all_vals)[-2]]  # So as not to get an inf

plt.plot([min_val, max_val], [min_val, max_val], c='k')

# Hide units
plt.xticks([])
plt.yticks([])

plt.xlabel('Birth')
plt.ylabel('Death')

plt.savefig('../plots/example_persistence_diagram.png')
plt.savefig('../plots/example_persistence_diagram.eps')

## Plot examples of the distance matrix and the persistent homology
To further query your data you can plot distance matrices, persistence diagrams and MDS plots from specific regions in the simulated brains in order to explore the structure of the data

In [None]:
def vec2mat(vec):
# Convert the distance vector into a matrix
    nodes = int(np.ceil(np.sqrt(len(vec) * 2)))  # Unravel the data
    distance_matrix = np.zeros((nodes, nodes))
    x, y = np.triu_indices(nodes, 1)
    distance_matrix[x, y] = vec
    return distance_matrix

def persistent_graph(barcode):
    # Pull out all the features
    births = barcode[:, 0]
    deaths = barcode[:, 1]
    betti = barcode[:, 2]

    plt.scatter(births[betti==1], deaths[betti==1], marker='x')
    plt.scatter(births[betti==0], deaths[betti==0])

    # Get the diagonal
    all_vals = barcode[:,:2].flatten()
    min_val = all_vals.min()
    max_val = all_vals[np.argsort(all_vals)[-2]]  # So as not to get an inf

    plt.plot([min_val, max_val], [min_val, max_val], c='k')

    # Hide units
    plt.xticks([])
    plt.yticks([])

    plt.xlabel('Birth')
    plt.ylabel('Death')


In [None]:
# Scratch space
xcoord = 6
ycoord = 34
zcoord = 16
repetitions = 5
nodes = 12
sig = '5.0'

aggregate = np.zeros((20, (nodes ** 2) // 2 - (nodes // 2)))
for ppt in range(1, 21):
    file = '../simulated_data/node_brain_dist/sub-%d_elipse_s-%s_1_1_t_5.0_1_1.0_%d_%d_1_resample-1.nii.gz' % (ppt, sig, repetitions, nodes)
    
    data = nb.load(file).get_data()
    
    aggregate[ppt - 1, :] = data[xcoord, ycoord, zcoord, :]

In [None]:
# Run the code for computing persistent homology on a single set of voxels
#!cd ..; for ppt_counter in `seq 1 20`; do echo ./code/run_TDA_coordinate.sh simulated_data/node_brain_dist/sub-${ppt_counter}\_elipse_s-${sig}\_1_1_t_5.0_1_1.0_${repetitions}_${nodes}_1_resample-1.nii.gz [$xcoord,$ycoord,$zcoord] ppt_${ppt_counter}\_${nodes}_${sig}.npy; done; cd code



In [None]:
file = '../simulated_data/supersubject_node_brain_dist/elipse_s-%s_1_1_t_5.0_1_1.0_10_12_1_resample-1.nii.gz' % sig
    
average = nb.load(file).get_data()[xcoord, ycoord, zcoord, :]

In [None]:
# Run the code for computing persistent homology on a single set of voxels
#!cd ..; ./code/run_TDA_coordinate.sh simulated_data/supersubject_node_brain_dist/elipse_s-${sig}\_1_1_t_5.0_1_1.0_10_12_1_resample-1.nii.gz [$xcoord,$ycoord,$zcoord] super_12_${sig}.npy; cd code



In [None]:
plt.figure(figsize=(10,80))
total_ppts = 20
for ppt in range(total_ppts):
    
    # Plot distance matrix
    dist = vec2mat(aggregate[ppt, :]) + vec2mat(aggregate[ppt, :]).T
    plt.subplot(total_ppts, 3, (ppt * 3) + 1)
    plt.imshow(dist)
    plt.axis('off')
    
    # Plot persistent homology
    barcode = np.load('../ppt_%d_%d_%s.npy' % (ppt + 1, nodes, sig))
    plt.subplot(total_ppts, 3, (ppt * 3) + 2)
    persistent_graph(barcode)
    
    # Plot mds
    plt.subplot(total_ppts, 3, (ppt * 3) + 3)
    graphs.make_mds(dist,
                    dim=2,
                   )

In [None]:
plt.figure(figsize=(10,5))

# Plot distance matrix
dist = vec2mat(average) + vec2mat(average).T
plt.subplot(1, 3, 1)
plt.imshow(dist)
plt.axis('off')

# Plot persistent homology
barcode = np.load('../super_%d_%s.npy' % (nodes, sig))
plt.subplot(1, 3, 2)
persistent_graph(barcode)

# Plot mds
plt.subplot(1, 3, 3)
graphs.make_mds(dist,
                dim=2,
               )