In [1]:
import os
import sys
import numpy as np
import nibabel as nib
import pandas as pd
from sklearn import svm
from sklearn.pipeline import Pipeline
import os.path
from sklearn.model_selection import RepeatedStratifiedKFold, cross_validate, permutation_test_score, StratifiedKFold, GridSearchCV, RepeatedKFold
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.metrics import balanced_accuracy_score,make_scorer
from copy import deepcopy
import argparse

from core_classification_functions import *
current_path = os.getcwd()
from mixed_sigmoid_normalisation import MixedSigmoidScaler

data_path="/Users/abry4213/data/fMRI_classification/"

%load_ext rpy2.ipython

In [26]:
%%R
# Load tidyverse R package
suppressPackageStartupMessages({
    library(tidyverse)
    library(see)
    library(cowplot)
    theme_set(theme_cowplot())
})

package ‘see’ was built under R version 4.3.3 


In [23]:
# Load main balanced accuracy results
univariate_p_values = pd.read_feather(f"{data_path}/classification_results/univariate_p_values.feather")
pairwise_p_values = pd.read_feather(f"{data_path}/classification_results/pairwise_p_values.feather")
combined_univariate_pairwise_p_values = pd.read_feather(f"{data_path}/classification_results/combined_univariate_pairwise_p_values.feather")
all_p_value_results = pd.concat([univariate_p_values, pairwise_p_values, combined_univariate_pairwise_p_values])

# Load SPI directionality info 
pairwise_feature_info = pd.read_csv(f"{data_path}/feature_info/pairwise_feature_info.csv")

# Load model size info
model_sizes = pd.read_csv(f"{data_path}/feature_info/model_sizes.csv")

all_p_value_results['pyspi_name'] = all_p_value_results.group_var
all_p_value_results = (all_p_value_results
                       .merge(pairwise_feature_info, on='pyspi_name', how='left')
                       .drop(columns=['Figure_name', 'Literature_Category', 'Feature_description', 'Module'], axis=1)
                       .assign(Directed = lambda x: x['Directed'].replace({'Yes': 'Directed', 'No': 'Undirected'}))
                       .assign(Directed = lambda x: np.where(x['Directed'].isna(), 'Undirected', x['Directed']))
                       .merge(model_sizes, how='left')
)

In [24]:
all_p_value_results.head()

Unnamed: 0,index,group_var,Classifier_Type,Analysis_Type,Disorder,Balanced_Accuracy,Balanced_Accuracy_SD,Study,p_value,p_value_HolmBonferroni,p_value_BenjaminiHochberg,pyspi_name,Directed,Number_Variables
0,1,Angular_Gyrus,Linear_SVM_sklearn,Brain_Region,ASD,0.464773,0.047356,ABIDE,0.983754,1.0,0.983754,Angular_Gyrus,Undirected,25
1,2,Central_Opercular_Cortex,Linear_SVM_sklearn,Brain_Region,ASD,0.516732,0.04397,ABIDE,0.174462,1.0,0.261694,Central_Opercular_Cortex,Undirected,25
2,3,Cingulate_Gyrus_anterior_division,Linear_SVM_sklearn,Brain_Region,ASD,0.522574,0.047888,ABIDE,0.106961,1.0,0.200791,Cingulate_Gyrus_anterior_division,Undirected,25
3,4,Cingulate_Gyrus_posterior_division,Linear_SVM_sklearn,Brain_Region,ASD,0.522027,0.043988,ABIDE,0.099296,1.0,0.200791,Cingulate_Gyrus_posterior_division,Undirected,25
4,5,Cuneal_Cortex,Linear_SVM_sklearn,Brain_Region,ASD,0.51972,0.040899,ABIDE,0.126958,1.0,0.225704,Cuneal_Cortex,Undirected,25


In [40]:
%%R -i all_p_value_results

all_p_value_results %>% 
    mutate(Disorder = factor(Disorder, levels=c("SCZ", "BP", "ADHD", "ASD"))) %>% 
    ggplot(data=., mapping=aes(x=log10(Number_Variables), y=100*Balanced_Accuracy)) +
    geom_point(aes(color=Disorder), alpha=0.8) +
    facet_wrap(Disorder ~ ., scales='fixed', nrow=1) +
    theme(legend.position="none",
          strip.background = element_blank(),
          panel.spacing = unit(2, "lines"),
          strip.text = element_text(face="bold")) +
    ylab("Balanced Accuracy (%)") +
    scale_color_manual(values = c("SCZ" = "#9d60a8", 
                                    "BP" = "#2F77C0", 
                                    "ADHD" = "#84b278", 
                                    "ASD" = "#E28328",
                                    "Control" = "grey80")) +
    xlab("Log10(Number of variables)") 

ggsave("../../plots/final_figures/model_size_vs_balanced_accuracy.png", width=8, height=3)