In [18]:
import copy
import matplotlib.cm as cmx
import matplotlib.colors as colors
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import numpy as np
import pickle
import utils

import warnings; warnings.simplefilter('ignore')

from atlases import DesikanAtlas
from matplotlib.collections import LineCollection
from mpl_toolkits.mplot3d import Axes3D
from nilearn import datasets, plotting
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

# Low Dimensional Connectome Dynamics

### Prepare Data - Separate Modalities

In [2]:
all_subjects_all_trials_connectomes = utils.load_connectomes(utils.ALL_SUBJECT_IDS, utils.ALL_TRIAL_IDS)
all_subjects_all_trials_connectomes['fmri'].shape

(17906, 68, 68)

Extract flattened representation of upper triangular of Pearson correlation matrix for each connectome type.

In [3]:
# NOTE: The below logic would have to change if we move away from using Desikan Atlas where the number of regions 
# are the same between EEG and fMRI
num_regions = all_subjects_all_trials_connectomes['fmri'].shape[1]
num_regions

68

In [4]:
upper_triangular_including_diagonal_idxs = np.triu_indices(num_regions, k=0)
lower_triangular_idxs = np.tril_indices(num_regions, k=-1)

In [5]:
all_subjects_all_trials_connectome_upper_triangular_flattened = copy.deepcopy(all_subjects_all_trials_connectomes)
for k in all_subjects_all_trials_connectome_upper_triangular_flattened:
    all_subjects_all_trials_connectome_upper_triangular_flattened[k] = np.array([c[upper_triangular_including_diagonal_idxs].flatten() for c in all_subjects_all_trials_connectomes[k]])

In [6]:
all_subjects_all_trials_connectome_upper_triangular_flattened['fmri'].shape

(17906, 2346)

### Prepare Data - Combined Modalities

In [7]:
def data_matrix_from_channels(channels):
    data_matrix = []
    for k in channels:
        data_matrix.append(all_subjects_all_trials_connectome_upper_triangular_flattened[k])
    data_matrix = np.concatenate(data_matrix, axis=1)
    return data_matrix

In [8]:
fmri_alpha_beta_delta_gamma_theta_matrix = data_matrix_from_channels(['fmri', 'alpha', 'beta', 'delta', 'gamma', 'theta'])
fmri_alpha_beta_delta_gamma_theta_matrix.shape

(17906, 14076)

### Prepare Data - Graph Statistic Timeseries

In [9]:
brain_graph_statistics_descriptions = [
    "fMRI Modularity",
    "Alpha-Band EEG Modularity",
    "Beta-Band EEG Modularity",
    "Delta-Band EEG Modularity",
    "Gamma-Band EEG Modularity",
    "Theta-Band EEG Modularity",
    
    "fMRI Assortativity",
    "Alpha-Band EEG Assortativity",
    "Beta-Band EEG Assortativity",
    "Delta-Band EEG Assortativity",
    "Gamma-Band EEG Assortativity",
    "Theta-Band EEG Assortativity",
    
    "fMRI Global Efficiency",
    "Alpha-Band EEG Global Efficiency",
    "Beta-Band EEG Global Efficiency",
    "Delta-Band EEG Global Efficiency",
    "Gamma-Band EEG Global Efficiency",
    "Theta-Band EEG Global Efficiency",
]

In [10]:
brain_graph_statistics = pickle.load(open('output/hmm/graph_statistics/brain_graph_statistics_time_series.pkl', 'rb'))
brain_graph_statistics_means = pickle.load(open('output/hmm/graph_statistics/brain_graph_statistics_means.pkl', 'rb'))
brain_graph_statistics_vars = pickle.load(open('output/hmm/graph_statistics/brain_graph_statistics_vars.pkl', 'rb'))

## Principal Component Analysis

Compute principal components of every connectome time series individually.

In [11]:
n_components = 10
pca_models = [PCA(n_components=n_components).fit(all_subjects_all_trials_connectome_upper_triangular_flattened[k]) for k in all_subjects_all_trials_connectome_upper_triangular_flattened]

Compute principal components of the multimodal connectome time series.

In [12]:
combined_pca_model = PCA(n_components=n_components).fit(fmri_alpha_beta_delta_gamma_theta_matrix)

Plot variance explained ratio for the PCA model of each connectome type.

In [13]:
fig = plt.figure(figsize=(30, 5))
fig.suptitle('Total Variance Explained w/ ' + str(n_components) + ' Components')

subplot_idx = 1
for (k, pca_model) in zip(all_subjects_all_trials_connectome_upper_triangular_flattened, pca_models):
    fig.add_subplot(1, len(pca_models), subplot_idx)
    plt.plot(pca_model.explained_variance_ratio_)
    plt.title("{0} - {1:.2f}%".format(k, 100*sum(pca_model.explained_variance_ratio_)))
    subplot_idx += 1    

plt.savefig('output/principal_component_analysis/separate_variance_explained.png')
plt.close()

Plot variance explained ratio for the multimodal connectome PCA model.

In [15]:
fig = plt.figure(figsize=(7, 5))
fig.suptitle('Total Variance Explained w/ ' + str(n_components) + ' Components')
plt.plot(combined_pca_model.explained_variance_ratio_)
plt.title("{0} - {1:.2f}%".format("MultiModal", 100*sum(combined_pca_model.explained_variance_ratio_)))
plt.savefig('output/principal_component_analysis/combined_variance_explained.png')
plt.close()

Plot spatial representation of each principal component in connectome space.

In [16]:
pca_models[0].components_.shape

(10, 2346)

In [21]:
fig = plt.figure(figsize=(180, 60))
fig.suptitle('Spatial Loadings of Principal Components', fontsize=40)

subplot_idx = 1
for (k, pca_model) in zip(all_subjects_all_trials_connectome_upper_triangular_flattened, pca_models):
    for component_idx in range(0, n_components):
        
        # Extract connectome representation of the principal component
        pc = np.zeros((num_regions, num_regions))
        pc[upper_triangular_including_diagonal_idxs] = pca_model.components_[component_idx]
        pc[lower_triangular_idxs] = pc.T[lower_triangular_idxs]

        # Plot connectome representation of the principal component
        ax = fig.add_subplot(len(pca_models), n_components, subplot_idx)
        DesikanAtlas.plot(pc, title='{0} PC{1} Connectome'.format(k, component_idx+1), axes=ax)
        subplot_idx += 1

plt.savefig('output/principal_component_analysis/spatial_loadings.png')
plt.close()

Plot correlation of principal component with connectome through time.

In [22]:
def add_arrow(line, position=None, direction='right', size=15, color=None):
    """
    add an arrow to a line.

    line:       Line2D object
    position:   x-position of the arrow. If None, mean of xdata is taken
    direction:  'left' or 'right'
    size:       size of the arrow in fontsize points
    color:      if None, line color is taken.
    """
    if color is None:
        color = line.get_color()

    xdata = line.get_xdata()
    ydata = line.get_ydata()

    if position is None:
        position = xdata.mean()
    # find closest index
    start_ind = np.argmin(np.absolute(xdata - position))
    if direction == 'right':
        end_ind = start_ind + 1
    else:
        end_ind = start_ind - 1

    line.axes.annotate('',
        xytext=(xdata[start_ind], ydata[start_ind]),
        xy=(xdata[end_ind], ydata[end_ind]),
        arrowprops=dict(arrowstyle="->", color=color),
        size=size
    )

In [24]:
fig = plt.figure(figsize=(300, 300))
fig.suptitle('Low-Dimensional Manifold Traversed by Brain State', fontsize=60)

subplot_idx = 1
for (k, pca_model) in zip(all_subjects_all_trials_connectome_upper_triangular_flattened, pca_models):
    
    correlation_timeseries_for_first_three_pcs = []
    
    # Gather correlation timeseries for the first 3 principal components
    for component_idx in range(0, 3):
        
        # Compute correlation between pc and connectome timeseries
        pc = pca_model.components_[component_idx]
        connectome_timeseries = all_subjects_all_trials_connectome_upper_triangular_flattened[k]
        
        correlation_between_pc_and_connectome_timeseries = [np.corrcoef(pc, time_pt)[0, 1] for time_pt in connectome_timeseries]
        correlation_timeseries_for_first_three_pcs.append(correlation_between_pc_and_connectome_timeseries)
        
    # Cluster time series
    time_series_data = np.array(correlation_timeseries_for_first_three_pcs)
    time = np.arange(0, 3*time_series_data[0].shape[0], step=3)
    time_series_data_including_time = np.vstack((time_series_data, time)).T
    clustered_correlation_timeseries_for_first_three_pcs = KMeans(n_clusters=30).fit(time_series_data_including_time)
    time_ordered_cluster_centers = clustered_correlation_timeseries_for_first_three_pcs.cluster_centers_
    time_ordered_cluster_centers = time_ordered_cluster_centers[time_ordered_cluster_centers[:, 3].argsort()]
    
    ax = fig.add_subplot(len(pca_models), 5, subplot_idx, projection='3d')
    lines = ax.plot(time_ordered_cluster_centers[:, 0],
                    time_ordered_cluster_centers[:, 1],
                    time_ordered_cluster_centers[:, 2],
                    c='black',
                    alpha=0.5,
                    linewidth=3.0)
    for line in lines:
        add_arrow(line, color='black', size=15)
        
    ax.scatter(time_ordered_cluster_centers[:, 0],
               time_ordered_cluster_centers[:, 1],
               time_ordered_cluster_centers[:, 2],
               c=np.arange(len(clustered_correlation_timeseries_for_first_three_pcs.cluster_centers_[:, 0])), 
               alpha=1.0,
               cmap='YlGnBu',
               marker='o',
               s=400)
    ax.set_xlabel('PC1', fontsize=20)
    ax.set_ylabel('PC2', fontsize=20)
    ax.set_zlabel('PC3', fontsize=20)
    ax.set_title(k+" Clustered (Time+Space) Low-Dimensional Manifold", fontsize=30)
    subplot_idx += 1
    
    # Plot correlation timeseries for first 3 PCs in a 3d scatterplot
    ax = fig.add_subplot(len(pca_models), 5, subplot_idx, projection='3d')
    ax.plot(correlation_timeseries_for_first_three_pcs[0],
            correlation_timeseries_for_first_three_pcs[1],
            correlation_timeseries_for_first_three_pcs[2],
            c='black',
            alpha=0.2,
            linewidth=0.5)
    ax.scatter(correlation_timeseries_for_first_three_pcs[0],
               correlation_timeseries_for_first_three_pcs[1],
               correlation_timeseries_for_first_three_pcs[2],
               c=np.arange(len(correlation_timeseries_for_first_three_pcs[0])), 
               alpha=0.5,
               cmap='YlGnBu',
               marker='o')
    ax.set_xlabel('PC1', fontsize=20)
    ax.set_ylabel('PC2', fontsize=20)
    ax.set_zlabel('PC3', fontsize=20)
    ax.set_title(k+" Low Dimensional Manifold", fontsize=30)
    subplot_idx += 1
    
    # Plot correlation timeseries for each PC separately
    cm = plt.get_cmap('YlGnBu') 
    cNorm = colors.Normalize(vmin=0, vmax=len(correlation_timeseries_for_first_three_pcs[0]))
    scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=cm)
    colorVals = scalarMap.to_rgba(np.arange(len(correlation_timeseries_for_first_three_pcs[0])))
    
    ax = fig.add_subplot(len(pca_models), 5, subplot_idx)
    ax.plot(correlation_timeseries_for_first_three_pcs[0])#, color=colorVals)
    ax.set_xlabel('Time', fontsize=20)
    ax.set_ylabel('PC1 Correlation', fontsize=20)
    ax.set_title("{0} PC1".format(k), fontsize=30)
    subplot_idx += 1
    
    ax = fig.add_subplot(len(pca_models), 5, subplot_idx)
    ax.plot(correlation_timeseries_for_first_three_pcs[1])#, color=colorVals)
    ax.set_xlabel('Time', fontsize=20)    
    ax.set_ylabel('PC2 Correlation', fontsize=20)
    ax.set_title("{0} PC2".format(k), fontsize=30)
    subplot_idx += 1
    
    ax = fig.add_subplot(len(pca_models), 5, subplot_idx)
    ax.plot(correlation_timeseries_for_first_three_pcs[1])#, color=colorVals)
    ax.set_xlabel('Time', fontsize=20)
    ax.set_ylabel('PC3 Correlation', fontsize=20)
    ax.set_title("{0} PC3".format(k), fontsize=30)
    subplot_idx += 1

plt.savefig('output/principal_component_analysis/low_dimensional_manifold_traversal.png')
plt.close()

Compute correlations between correlation timeseries of PCs of different modalities.

In [None]:
correlations_between_pcs_of_different_modalities = {}

for (k_a, pca_model_a) in zip(all_subjects_all_trials_connectome_upper_triangular_flattened, pca_models):
    for (k_b, pca_model_b) in zip(all_subjects_all_trials_connectome_upper_triangular_flattened, pca_models):
        for a_i in range(0, 3):
            for b_j in range(0, 3):
                
                # Extract PC i from modality a and PC j from modality b
                component_i = pca_model_a.components_[a_i]
                component_j = pca_model_b.components_[b_j]
                
                # Compute correlation timeseries of PC i in modality a and PC j in modality b
                connectome_timeseries_a = all_subjects_all_trials_connectome_upper_triangular_flattened[k_a]
                connectome_timeseries_b = all_subjects_all_trials_connectome_upper_triangular_flattened[k_b]
                
                correlation_timeseries_for_component_i_in_a = [np.corrcoef(component_i, time_pt)[0, 1] for time_pt in connectome_timeseries_a]
                correlation_timeseries_for_component_j_in_b = [np.corrcoef(component_j, time_pt)[0, 1] for time_pt in connectome_timeseries_b]

                # Compute correlation between correlation timeseries
                corr = np.corrcoef(correlation_timeseries_for_component_i_in_a, correlation_timeseries_for_component_j_in_b)[0, 1]                
                correlations_between_pcs_of_different_modalities["{0}-{1} <-> {2}-{3}".format(k_a, a_i, k_b, b_j)] = corr
                print("{0}-{1} <-> {2}-{3} = {4}".format(k_a, a_i, k_b, b_j, corr))

In [None]:
labels = ["{0}-{1}".format(k, i) for k in all_subjects_all_trials_connectome_upper_triangular_flattened for i in range(0, 3)]
N = len(labels)
cross_modality_pc_correlation_matrix = np.zeros((N, N))
for i in range(N):
    for j in range(N):
        lookup_key = "{0} <-> {1}".format(labels[i], labels[j])
        cross_modality_pc_correlation_matrix[i][j] = correlations_between_pcs_of_different_modalities[lookup_key]

In [None]:
f = plt.figure(figsize=(15, 10))

plt.imshow(cross_modality_pc_correlation_matrix, cmap='gist_heat')
plt.title("Cross Modality Correlations of Principal Component Correlation Timeseries").set_position([.5, 1.3])
plt.axes().xaxis.set_ticks_position('top')
plt.xticks(range(len(labels)), labels, rotation='vertical')
plt.yticks(range(len(labels)), labels)
plt.colorbar()

f.savefig('output/principal_component_analysis/cross_modality_correlations_of_pc_timeseries.png')
plt.close()

## Multimodal Low Dimensional Brain Manifold Characterized By Graph Statistics

In [25]:
fmri_alpha_beta_delta_gamma_theta_matrix.shape

(17906, 14076)

In [26]:
brain_graph_statistics.shape

(17906, 18)

In [27]:
brain_graph_statistics_descriptions

['fMRI Modularity',
 'Alpha-Band EEG Modularity',
 'Beta-Band EEG Modularity',
 'Delta-Band EEG Modularity',
 'Gamma-Band EEG Modularity',
 'Theta-Band EEG Modularity',
 'fMRI Assortativity',
 'Alpha-Band EEG Assortativity',
 'Beta-Band EEG Assortativity',
 'Delta-Band EEG Assortativity',
 'Gamma-Band EEG Assortativity',
 'Theta-Band EEG Assortativity',
 'fMRI Global Efficiency',
 'Alpha-Band EEG Global Efficiency',
 'Beta-Band EEG Global Efficiency',
 'Delta-Band EEG Global Efficiency',
 'Gamma-Band EEG Global Efficiency',
 'Theta-Band EEG Global Efficiency']

In [29]:
combined_pca_model.components_.shape

(10, 14076)

In [30]:
fig = plt.figure(figsize=(300, 210))
fig.suptitle('Low-Dimensional Manifold Traversed by Multimodal Brain State\ncreated by Bliss and Salina', fontsize=300, fontweight='bold')

# Gather correlation timeseries for the first 3 principal components
correlation_timeseries_for_first_three_pcs = []
for component_idx in range(0, 3):

    # Compute correlation between pc and connectome timeseries
    pc = combined_pca_model.components_[component_idx]
    correlation_between_pc_and_connectome_timeseries = [np.corrcoef(pc, time_pt)[0, 1] for time_pt in fmri_alpha_beta_delta_gamma_theta_matrix]
    correlation_timeseries_for_first_three_pcs.append(correlation_between_pc_and_connectome_timeseries)

# Cluster time series
num_clusters = 20
time_series_data = np.array(correlation_timeseries_for_first_three_pcs)
time = np.arange(0, 3*time_series_data[0].shape[0], step=3)
time_series_data_including_time = np.vstack((time_series_data, time)).T
clustered_correlation_timeseries_for_first_three_pcs = KMeans(n_clusters=num_clusters).fit(time_series_data_including_time)
time_ordered_cluster_centers = clustered_correlation_timeseries_for_first_three_pcs.cluster_centers_
time_ordered_cluster_centers = time_ordered_cluster_centers[time_ordered_cluster_centers[:, 3].argsort()]

ax = fig.add_subplot(111, projection='3d')

# Plot surface
ax.plot_trisurf(time_ordered_cluster_centers[:, 0],
                time_ordered_cluster_centers[:, 1],
                time_ordered_cluster_centers[:, 2], 
                cmap='gray', 
                alpha=0.2)

# Plot time lines
points = time_ordered_cluster_centers[:, :-1].reshape(-1, 1, 3)
segments = np.concatenate([points[:-1], points[1:]], axis=1)

cm = plt.get_cmap('YlGnBu') 
colors=[cm(float(i)/(num_clusters-1)) for i in range(num_clusters-1)]

for i in range(num_clusters-1):
    segment = segments[i]
    line, = ax.plot(segment[:, 0],
                   segment[:, 1],
                   segment[:, 2],
                   color=colors[i],
                   alpha=1.0,
                   linewidth=8.0)
    line.set_solid_capstyle('round')
#     add_arrow(line, color=colors[i], size=20)

# Compute z score of each statistic type for each label
brain_graph_statistics_per_cluster = []
for cluster in range(num_clusters):
    statistics_of_points_in_cluster = brain_graph_statistics[clustered_correlation_timeseries_for_first_three_pcs.labels_ == cluster]
    mean_of_statistics_of_points_in_cluster = np.mean(statistics_of_points_in_cluster, axis=0)
    z_of_statistics_of_points_in_cluster = (mean_of_statistics_of_points_in_cluster - brain_graph_statistics_means) / np.sqrt(brain_graph_statistics_vars)
    brain_graph_statistics_per_cluster.append(z_of_statistics_of_points_in_cluster)


def calculate_arc_points(start,end):
    x = [0] + np.cos(np.linspace(start, end, 10)).tolist()
    y = [0] + np.sin(np.linspace(start, end, 10)).tolist()
    xy1 = np.column_stack([x, y])
    s1 = np.abs(xy1).max()
    return xy1, s1

cm = plt.get_cmap('gist_ncar') 
stat_colors = [cm(float(k)/(num_clusters-1)) for k in range(len(brain_graph_statistics_per_cluster))]

for i in range(time_ordered_cluster_centers.shape[0]):
    x = time_ordered_cluster_centers[i,0]
    y = time_ordered_cluster_centers[i,1]
    z = time_ordered_cluster_centers[i,2]
    cluster_stats = brain_graph_statistics_per_cluster[i]
        
    prev_r = 0
    
    for j in range(cluster_stats.shape[0]):
        r = 2 * np.pi * (j+1)/cluster_stats.shape[0]
        xy, s = calculate_arc_points(prev_r, r)
        prev_r = r
        
        scale = 35000
        size = scale + (scale)*abs(cluster_stats[j])
        
        alpha = max(0.5, min(1, abs(cluster_stats[j])))
        ax.scatter(x, y, z, marker=xy, s=s ** 2 * size, facecolor=stat_colors[j], alpha=alpha)

ax.set_xlabel('PC1', fontsize=50)
ax.set_ylabel('PC2', fontsize=50)
ax.set_zlabel('PC3', fontsize=50)

ax.set_xticks([])
ax.set_yticks([])
ax.set_zticks([])

ax.set_facecolor('black')

ax.grid(False)
ax.xaxis.pane.fill = False
ax.yaxis.pane.fill = False
ax.zaxis.pane.fill = False

ax.xaxis.pane.set_edgecolor('black')
ax.yaxis.pane.set_edgecolor('black')
ax.zaxis.pane.set_edgecolor('black')

handles = []
for c, desc in zip(stat_colors, brain_graph_statistics_descriptions):
    handles.append(mpatches.Patch(color=c, label=desc))
ax.legend(handles=handles, loc='lower left', fontsize=65)

plt.savefig('output/principal_component_analysis/multi_modal_low_dimensional_manifold_traversal.png')
plt.close()

Plot brain graph statistics per cluster in a more readable (albeit less artistic) format.

In [31]:
f = plt.figure(figsize=(10*len(brain_graph_statistics_per_cluster), 20))
f.suptitle('Brain Graph Statistic - Cluster Analysis')
subplot_idx = 1

for i in range(len(brain_graph_statistics_per_cluster)):

    cluster_z_scored_stats = brain_graph_statistics_per_cluster[i]

    f.add_subplot(1, len(brain_graph_statistics_per_cluster), subplot_idx)
    x = 0
    for z_scored_stat in cluster_z_scored_stats:
        plt.bar(x, z_scored_stat, align='center', width=0.7, color='blue' if z_scored_stat > 0 else 'red')
        x += 1
    plt.ylim([-1.96, 1.96])
    plt.ylabel("Z-Score of Statistic in Cluster Compared to Overall Timeseries")
    plt.title("Cluster {0}".format(i))
    plt.xticks(range(len(brain_graph_statistics_descriptions)), brain_graph_statistics_descriptions, rotation='vertical')
    subplot_idx += 1

plt.subplots_adjust(hspace=0.5)
f.savefig('output/principal_component_analysis/multi_modal_low_dimensional_manifold_traversal_cluster_analysis.png')
plt.close()