In [2]:
import pandas as pd
from pyspi.calculator import Calculator
import numpy as np
from copy import deepcopy
import glob
import os
import random
import neuromaps
from neuromaps.parcellate import Parcellater
from nibabel import freesurfer as fs
import nibabel as nib
from neuromaps.datasets import fetch_annotation
from pathlib import Path
from scipy.spatial.distance import cdist
from scipy.stats import spearmanr
import statsmodels.stats.multitest

# Set seed to 127
random.seed(127)

# Add connectome workbench to path
os.environ['PATH'] = os.environ['PATH'] + ':/Applications/workbench/bin_macosx64'

%load_ext rpy2.ipython

In [3]:
%%R 

suppressPackageStartupMessages({
    library(broom)
    library(circlize)
    library(ComplexHeatmap)
    library(cowplot)
    library(dendextend)
    library(FactoMineR)
    library(GGally)
    library(ggseg)
    library(glue)
    library(grid)
    library(patchwork)
    library(see)
    library(tidyverse)
})

# Set cowplot theme
theme_set(theme_cowplot())

## Load in data

In [10]:
data_path = "/Users/abry4213/data/HCP100/"

# Load in pyspi SPI info 
pyspi_info = pd.read_csv("../feature_extraction/pyspi/pyspi_SPI_info.csv")

# Load in all the pyspi homotopic res
all_homotopic_res = (pd.concat([pd.read_csv(f) for f in glob.glob(data_path + "time_series_features/pyspi/*_homotopic_pyspi_fast_extended_results.csv")])
                     .merge(pyspi_info, on="SPI", how="left")
                              .groupby(['base_region_to', 'SPI', 'Subject'])['value']
                              .mean()
                              .reset_index()
                              .dropna(subset=['value'])
                              )

subject_wise_SPI_corrs = (all_homotopic_res
                  .pivot_table(index=['base_region_to', 'Subject'], columns='SPI', values='value')
                    .reset_index()
                    .drop(columns=['base_region_to'])
                    .set_index('Subject')
                    .groupby('Subject')
                    .apply(lambda x: x.corr(method='spearman'))
                  )

mean_SPI_corrs_across_subjects = (subject_wise_SPI_corrs
 .reset_index()
 .melt(id_vars=['Subject', 'SPI'], var_name='SPI2', value_name='Spearman_Rho')
 .dropna(subset=['Spearman_Rho'])
 .assign(Spearman_Abs = lambda x: np.abs(x['Spearman_Rho']))
 .rename(columns={'SPI': 'SPI1'})
 .groupby(['SPI1', 'SPI2'])['Spearman_Abs']
    .mean()
    .reset_index()
 )

mean_SPI_corrs_across_subjects_wide = mean_SPI_corrs_across_subjects.pivot(index='SPI1', columns='SPI2', values='Spearman_Abs')

In [34]:
mean_SPI_corrs_across_subjects.query("SPI1=='cov_EmpiricalCovariance' & Spearman_Abs > 0.95")

Unnamed: 0,SPI1,SPI2,Spearman_Abs
9808,cov_EmpiricalCovariance,bary-sq_sgddtw_mean,0.995612
9838,cov_EmpiricalCovariance,cov-sq_EllipticEnvelope,0.968516
9839,cov_EmpiricalCovariance,cov-sq_EmpiricalCovariance,0.999300
9840,cov_EmpiricalCovariance,cov-sq_GraphicalLasso,0.999297
9841,cov_EmpiricalCovariance,cov-sq_GraphicalLassoCV,0.998046
...,...,...,...
10020,cov_EmpiricalCovariance,xcorr-sq_max_sig-True,0.996238
10023,cov_EmpiricalCovariance,xcorr_max_sig-True,0.996571
10026,cov_EmpiricalCovariance,xme_gaussian_k1,0.984153
10027,cov_EmpiricalCovariance,xme_gaussian_k10,0.977668


In [29]:
%%R -i mean_SPI_corrs_across_subjects_wide -o SPI_performance_cluster_df

library(ComplexHeatmap)

svg("../plots/SPI_similarity/All_SPI_spearman_corr_heatmap.svg", width=5, height=5, bg=NA)
ht1 <- ComplexHeatmap::Heatmap(mean_SPI_corrs_across_subjects_wide,
                                clustering_method_rows = "ward.D2",
                                clustering_method_columns = "ward.D2",
                                row_names_side = "right",
                                row_dend_side = "left", 
                                row_dend_width = unit(1, "cm"),
                                row_dend_gp = gpar(lwd=unit(0.5, "cm")),
                                row_split = 4,
                                column_split = 4,
                                row_title = NULL,
                                column_title = NULL,
                                show_row_names = FALSE,
                                show_column_names = FALSE,
                                name = "Spearman corr (abs)",
                                show_column_dend = FALSE,
                                col = colorRamp2(
                                        c(0, 0.25, 0.5, 0.75, 1),
                                        c("#F2D8B9", "#F2C488", "#ECA077", "#D46276", "#953A5C")
                                      ),
                                heatmap_legend_param = list(legend_direction = "horizontal",
                                                            legend_width = unit(5, "cm"))) 

ht1 <- draw(ht1, heatmap_legend_side = "bottom",
    padding = unit(c(2, 2, 2, 2), "mm"),
    background = "transparent")

dev.off()

# Get row order
row_order <- row_order(ht1)
SPI_index_df <- data.frame(SPI_Index = 1:length(rownames(mean_SPI_corrs_across_subjects_wide)), 
                           SPI = rownames(mean_SPI_corrs_across_subjects_wide))

# Map the index of the row order as a cluster, to each node
# Convert to data frame
SPI_performance_cluster_df <- do.call(rbind, lapply(seq_along(row_order), function(i) {
  data.frame(SPI_Index = row_order[[i]], Cluster = i)
})) %>% left_join(SPI_index_df)


Joining with `by = join_by(SPI_Index)`


The input is a data frame-like object, convert it to a matrix. 


In [35]:
SPI_performance_cluster_df_info = SPI_performance_cluster_df.merge(pyspi_info, on='SPI', how='left')
SPI_performance_cluster_df_info.head()

Unnamed: 0,SPI_Index,Cluster,SPI,Directionality,Literature_category,Module
0,181,1,sgc_parametric_max_fs-1_fmin-0_fmax-0-25_order-1,Directed,spectral,M05
1,184,1,sgc_parametric_max_fs-1_fmin-1e-05_fmax-0-5_or...,Directed,spectral,
2,90,1,gpdcoh_multitaper_mean_fs-1_fmin-0_fmax-0-5,Directed,spectral,M06
3,79,1,dtf_multitaper_mean_fs-1_fmin-0_fmax-0-5,Directed,spectral,M06
4,117,1,pdcoh_multitaper_mean_fs-1_fmin-0_fmax-0-5,Directed,spectral,M06


In [36]:
mean_SPI_corrs_across_subjects.head()

Unnamed: 0,SPI1,SPI2,Spearman_Abs
0,anm,anm,1.0
1,anm,bary-sq_dtw_max,0.139062
2,anm,bary-sq_dtw_mean,0.138616
3,anm,bary-sq_sgddtw_max,0.14639
4,anm,bary-sq_sgddtw_mean,0.130604


In [44]:
(mean_SPI_corrs_across_subjects
 .merge(SPI_performance_cluster_df_info[['SPI', 'Cluster']], left_on='SPI1', right_on='SPI')
 .rename(columns={'Cluster': 'Cluster1'})
 .drop(columns=['SPI'])
 .merge(SPI_performance_cluster_df_info[['SPI', 'Cluster']], left_on='SPI2', right_on='SPI')
 .rename(columns={'Cluster': 'Cluster2'})
    .drop(columns=['SPI'])
    .query("Cluster1 == Cluster2")
 .groupby(['Cluster1'])['Spearman_Abs']
 .agg(['min', 'max', 'mean'])
 .reset_index()
 )

Unnamed: 0,Cluster1,min,max,mean
0,1,0.540391,1.0,0.905743
1,2,0.077081,1.0,0.191831
2,3,0.411862,1.0,0.781457
3,4,0.133152,1.0,0.378879
