In [133]:
import os
import sys
import numpy as np
import pandas as pd
import os.path
from scipy import stats
import statsmodels

from statsmodels.stats.multitest import \
     multipletests as mult_test

data_path="/Users/abry4213/data/fMRI_classification/"

from core_classification_functions import *
current_path = os.getcwd()
from mixed_sigmoid_normalisation import MixedSigmoidScaler
data_path="/Users/abry4213/data/fMRI_classification/"

%load_ext rpy2.ipython

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [134]:
%%R
# Load tidyverse R package
suppressPackageStartupMessages({
    library(broom)
    library(cowplot)
    library(ggseg)
    library(ggsegHO)
    library(patchwork)
    library(see)
    library(tidyverse)
    theme_set(theme_cowplot())

    # Source my custom correctR code
    source("/Users/abry4213/github/correctR/R/repkfold_ttest.R")
})

In [135]:
# Load metadata
UCLA_CNP_metadata = pd.read_feather(f"{data_path}/input_data/UCLA_CNP_sample_metadata_filtered.feather")
ABIDE_metadata = pd.read_feather(f"{data_path}/input_data/ABIDE_sample_metadata_filtered.feather").assign(Study = "ABIDE")
merged_metadata = pd.concat([UCLA_CNP_metadata, ABIDE_metadata])

# Load brain region info
ABIDE_brain_region_info = pd.read_table(f"{data_path}/input_data/ABIDE_brain_region_info.txt", sep="\t").sort_values("Brain Region")
# Replace spaces with underscores in column names 
ABIDE_brain_region_info.columns = [col.replace(" ", "_") for col in ABIDE_brain_region_info.columns]
ABIDE_brain_regions = ABIDE_brain_region_info["Brain_Region"].values

# Load "glocal" features
glocal_features = (pd.read_feather(f"{data_path}/time_series_features/All_glocal_coupling.feather")
                   .assign(Feature_Type = "glocal_coupling"))

# Load univariate balanced accuracy values
univariate_p_values = (pd.read_feather(f'{data_path}/classification_results/univariate_p_values.feather')
                       .query("Analysis_Type=='Brain_Region'")
                       .rename(columns={"group_var": "Brain_Region"})
                       .assign(Feature_Type = "catch25_features")
)

In [169]:
%%R -i glocal_features

glocal_features %>% 
        mutate(Diagnosis = factor(Diagnosis, levels=c("SCZ", "BP", "ADHD", "ASD", "Control")),
               Study = ifelse(Study=="ABIDE", "ABIDE", "UCLA CNP")) %>%
        mutate(Study = factor(Study, levels=c("UCLA CNP", "ABIDE"))) %>%
        ggplot(data=., mapping=aes(x=Diagnosis, y=Pearson_Correlation, fill=Diagnosis, color=Diagnosis)) +
        geom_violinhalf(scale="width", position = position_nudge(x=0.2))  +
        stat_summary(color="black", fun="mean", geom="crossbar", width=0.45, linewidth=0.3, 
            show.legend=FALSE, position = position_nudge(x=0.42)) +
        geom_point(position = position_jitter(width = 0.1),
                    size = 1, alpha=0.7, stroke=0, aes(group=Brain_Region)) +
        ylab("Local-Global Coupling (Pearson's R)") +
        facet_grid(.~Study, scales="free", space="free") +
        scale_fill_manual(values = c("SCZ" = "#9d60a8", 
                                    "BP" = "#2F77C0", 
                                    "ADHD" = "#84b278", 
                                    "ASD" = "#E28328",
                                    "Control" = "grey80"))  +
        scale_color_manual(values = c("SCZ" = "#9d60a8", 
                                    "BP" = "#2F77C0", 
                                    "ADHD" = "#84b278", 
                                    "ASD" = "#E28328",
                                    "Control" = "grey80")) +
        theme(legend.position = "bottom", 
            axis.text.x = element_blank(),
            axis.ticks.x = element_blank(),
            axis.title.x = element_blank(),
            strip.background = element_blank(), 
            strip.text = element_text(face="bold", size=12),
            panel.spacing.y = unit(-0.5, "lines"))


# Save
ggsave("../../plots/univariate_results/Brain_region_local_global_stats.png", width=6, height=4, units='in', dpi=300)

In [141]:
# Define classification parameters
model = svm.SVC(kernel="linear", C=1, class_weight="balanced")
pipe = Pipeline([('scaler', MixedSigmoidScaler(unit_variance=True)),
            ('model', model)])
num_folds=10
num_repeats=10
num_null_iters=0
num_jobs=1

# Analysis parameters
grouping_var="covariance"
classifier_type="Linear_SVM_sklearn"

# Define scorers
scorers = [make_scorer(balanced_accuracy_score)]
scoring_names = ["Balanced_Accuracy"]

# Define RepeatedStratifiedKFold splitter
RepeatedStratifiedKFold_splitter = RepeatedStratifiedKFold(n_splits=num_folds, n_repeats=num_repeats, random_state=127)


In [150]:
# Iterate over each disorder and each brain region and perform case--control classification
study_group_df = pd.DataFrame({"Disorder": ["SCZ", "BP", "ADHD", "ASD"],
                               "Study": ["UCLA_CNP", "UCLA_CNP", "UCLA_CNP", "ABIDE"]})

# Iterate over each study and disorder, perform Wilcox rank-sum test for each SPI
brain_region_glocal_res_list = []

if not os.path.isfile(f"{data_path}/classification_results/Brain_region_local_global_classification_results.feather"):
    for i in range(study_group_df.shape[0]):
        disorder = study_group_df.loc[i, "Disorder"]
        study = study_group_df.loc[i, "Study"]

        # Filter glocal data to only include the current disorder and study
        glocal_features_filtered = glocal_features.query("Diagnosis in [@disorder, 'Control'] & Study == @study")

        # Iterate over brain regions
        for brain_region in glocal_features_filtered.Brain_Region.unique():
            print(f"Running classification for {disorder} in {brain_region} in {study}")
            glocal_features_brain_region = glocal_features_filtered.query("Brain_Region == @brain_region")

            # Fit linear SVM
            glocal_data_only = glocal_features_brain_region['Pearson_Correlation'].values.reshape(-1, 1)

            # Extract sample ID and diagnosis
            class_labels = np.array([int(i==disorder) for i in glocal_features_brain_region["Diagnosis"].tolist()])
            sample_IDs = np.array(glocal_features_brain_region["Sample_ID"].tolist())

            # Run classification
            main_classification_res, _, _ = run_k_fold_classifier_for_feature(feature_data = glocal_data_only, 
                                                                                            pipe = pipe,
                                                                                            CV_splitter = RepeatedStratifiedKFold_splitter,
                                                                                            class_labels=class_labels,
                                                                                            sample_IDs = sample_IDs,
                                                                                            scorers=scorers,
                                                                                            scoring_names=scoring_names,
                                                                                            num_null_iters=num_null_iters,
                                                                                            num_folds = num_folds,
                                                                                            num_repeats = num_repeats,
                                                                                            num_jobs = num_jobs)
            
            # Add 1 to Fold and Repeat
            main_classification_res["Fold"] = main_classification_res["Fold"] + 1
            main_classification_res["Repeat"] = main_classification_res["Repeat"] + 1
            
            # Assign key details to dataframes
            main_classification_res["Disorder"] = disorder
            main_classification_res["Study"] = study
            main_classification_res["Analysis_Type"] = Analysis_Type
            main_classification_res["Brain_Region"] = brain_region
            main_classification_res["Classifier_Type"] = classifier_type

            # Append to list
            brain_region_glocal_res_list.append(main_classification_res)

    # Combine all results
    brain_region_glocal_res = pd.concat(brain_region_glocal_res_list)

    # Save results
    brain_region_glocal_res.reset_index().to_feather(f"{data_path}/classification_results/Brain_region_local_global_classification_results.feather")
else:
    brain_region_glocal_res = pd.read_feather(f"{data_path}/classification_results/Brain_region_local_global_classification_results.feather")

# Find the mean
brain_region_glocal_res_mean = (brain_region_glocal_res
                                .groupby(["Disorder", "Study", "Brain_Region", "Analysis_Type", "Classifier_Type"])
                                .agg({"Balanced_Accuracy": "mean"})
                                .reset_index()
)

In [146]:
# Set Brain_Region values to ABIDE_brain_regions where Disorder == "ASD" in brain_region_glocal_res
brain_region_glocal_res_mean.loc[brain_region_glocal_res_mean["Disorder"] == "ASD", "Brain_Region"] = ABIDE_brain_regions
univariate_p_values.loc[univariate_p_values["Disorder"] == "ASD", "Brain_Region"] = ABIDE_brain_regions

In [164]:
%%R -i brain_region_glocal_res_mean,univariate_p_values

brain_region_glocal_res_mean %>%
    mutate(Feature_Type = "glocal_coupling") %>%
    plyr::rbind.fill(., univariate_p_values) %>%
    mutate(Disorder = factor(Disorder, levels=c("SCZ", "BP", "ADHD", "ASD"))) %>%
    ggplot(data=., mapping=aes(x=Feature_Type, y=100*Balanced_Accuracy, color=Feature_Type)) +
    geom_point(size=2, alpha=0.8, stroke=0) +
    geom_line(aes(group = Brain_Region), color="gray50", alpha=0.3, linewidth=0.25) +
    facet_grid(.~Disorder) +
    labs(color="Feature Type", y="Balanced Accuracy (%)", x="Feature Type") +
    scale_color_manual(values = c("catch25_features" = "#67913c", "glocal_coupling" = "#a864d2")) +
    theme(axis.text.x = element_blank(), 
          axis.ticks.x = element_blank(),
          legend.position = "bottom",
          strip.text = element_text(face="bold", size=12),
          strip.background = element_blank())

# Save
ggsave("../../plots/univariate_results/Brain_region_local_global_classification_results.svg", width=7, height=2.5, units='in', dpi=300)

In [85]:
%%R -i brain_region_glocal_res_mean,univariate_p_values -o region_wise_diffs

region_wise_diffs <- brain_region_glocal_res_mean %>%
    mutate(Feature_Type = "glocal_coupling") %>%
    plyr::rbind.fill(., univariate_p_values) %>%
    mutate(Disorder = factor(Disorder, levels=c("SCZ", "BP", "ADHD", "ASD"))) %>% 
    pivot_wider(id_cols=c("Disorder", "Study", "Brain_Region"), names_from="Feature_Type", values_from="Balanced_Accuracy") %>% 
    mutate(diff = 100*(glocal_coupling - catch25_features), .keep="unused") 


In [113]:
# Find min and max diff for coloring
min_diff = np.min(region_wise_diffs["diff"])
max_diff = np.max(region_wise_diffs["diff"])

region_wise_diffs_ABIDE = region_wise_diffs.query("Study=='ABIDE'")
region_wise_diffs_UCLA_CNP = region_wise_diffs.query("Study=='UCLA_CNP'")
region_wise_diffs_ABIDE = (region_wise_diffs_ABIDE
                           .merge(ABIDE_brain_region_info, on="Brain_Region")
                           .assign(region = lambda x: np.where(x["Brain_Region"].str.contains("Hesch"), 
                                                               'Heschl s Gyrus includes H1 and H2 ', 
                                                               x["region"])))

In [157]:
%%R -i region_wise_diffs_ABIDE,ABIDE_brain_region_info,region_wise_diffs_UCLA_CNP,min_diff,max_diff

diff_plots_list <- list()

for (disorder in c("SCZ", "BP", "ADHD")) {
    disorder_plot <- region_wise_diffs_UCLA_CNP %>%
        filter(Disorder == disorder) %>%
        mutate(label = ifelse(str_detect(Brain_Region, "ctx-"),
                                gsub("-", "_", Brain_Region),
                                as.character(Brain_Region))) %>%
        mutate(label = gsub("ctx_", "", label)) %>%
        ggseg(atlas="dk", mapping=aes(fill=diff), position="stacked", colour="darkgray") +
        theme_void() +
        theme(plot.title = element_text(hjust=0.5, face="bold"), legend.position='bottom') +
        ggtitle(disorder) +
        scale_fill_gradient2(low="#67913c", mid="white", high="#a864d2", midpoint=0, na.value="white",
                            limits=c(min_diff,max_diff)) +
        guides(col = guide_legend(override.aes = list(color="black"))) 

    diff_plots_list[[disorder]] <- disorder_plot
}

ABIDE_brain_plot <- region_wise_diffs_ABIDE %>%
    ggseg(atlas = "hoCort", mapping = aes(fill = diff),
        position = "stacked", colour = "darkgray") + 
    theme_void() +
    theme(plot.title = element_text(hjust=0.5, face="bold"), legend.position='bottom') +
    scale_fill_gradient2(low="#67913c", mid="white", high="#a864d2", midpoint=0, na.value="white",
                        limits=c(min_diff,max_diff)) +
    ggtitle("ASD") +
    guides(col = guide_legend(override.aes = list(color="black")))
diff_plots_list[["ASD"]] <- ABIDE_brain_plot

wrap_plots(diff_plots_list, nrow=1) + 
    plot_layout(guides = "collect") & 
    theme(legend.position = 'bottom')
ggsave("../../plots/univariate_results/Brain_region_local_global_classification_results_brains.svg", width=10, height=4, units='in', dpi=300)
    

merging atlas and data by 'label'
merging atlas and data by 'label'
merging atlas and data by 'label'
merging atlas and data by 'region'
1: Some data not merged properly. Check for naming errors in data:
   atlas type  hemi  side  region label        roi   Disorder Study Brain_Region
   <chr> <chr> <chr> <chr> <chr>  <chr>        <chr> <fct>    <chr> <chr>       
 1 <NA>  <NA>  <NA>  <NA>  <NA>   Left-Accumb… <NA>  SCZ      UCLA… Left-Accumb…
 2 <NA>  <NA>  <NA>  <NA>  <NA>   Left-Amygda… <NA>  SCZ      UCLA… Left-Amygda…
 3 <NA>  <NA>  <NA>  <NA>  <NA>   Left-Caudate <NA>  SCZ      UCLA… Left-Caudate
 4 <NA>  <NA>  <NA>  <NA>  <NA>   Left-Hippoc… <NA>  SCZ      UCLA… Left-Hippoc…
 5 <NA>  <NA>  <NA>  <NA>  <NA>   Left-Pallid… <NA>  SCZ      UCLA… Left-Pallid…
 6 <NA>  <NA>  <NA>  <NA>  <NA>   Left-Putamen <NA>  SCZ      UCLA… Left-Putamen
 7 <NA>  <NA>  <NA>  <NA>  <NA>   Left-Thalam… <NA>  SCZ      UCLA… Left-Thalam…
 8 <NA>  <NA>  <NA>  <NA>  <NA>   Right-Accum… <NA>  SCZ      UCLA…