In [1]:
from junifer.storage import HDF5FeatureStorage
from julearn.api import run_cross_validation
from julearn.pipeline import PipelineCreator
from julearn.viz import plot_scores
from julearn.stats.corrected_ttest import corrected_ttest
import pandas as pd
import seaborn as sns
from sklearn.svm import LinearSVC
import warnings


It is highly recommended to configure Git before using DataLad. Set both 'user.name' and 'user.email' configuration variables.


In [2]:
storage = HDF5FeatureStorage(uri='./data/AOMIC_Histograms_Parcels.hdf5')

In [3]:
df_parcellations = storage.read_df('VBM_GM_Schaefer100x17_mean_aggregation')
df_histograms= storage.read_df('VBM_GM_Histogram_100bins_IXI_hist')
df_demographics = pd.read_csv('./data/participants.tsv',sep='\t')
df_demographics.rename(columns={"participant_id": "subject"}, inplace=True)

In [10]:
df_parcellations.columns = df_parcellations.columns.astype(str)
df_histograms.columns = df_histograms.columns.astype(str)

# List of columns for each dataframe
X_parcellations = list(df_parcellations.columns)
X_histograms = list(df_histograms.columns)

# Merge with df_demographics on 'subject'
df_full_parcellations = df_parcellations.merge(df_demographics, on="subject")
df_full_histograms = df_histograms.merge(df_demographics, on="subject")

# Map 'sex' column
df_full_parcellations['sex'] = df_full_parcellations['sex'].map({'F': 1, 'M': 2})
df_full_histograms['sex'] = df_full_histograms['sex'].map({'F': 1, 'M': 2})

# Drop rows with NaN values
df_full_parcellations = df_full_parcellations.dropna()
df_full_histograms = df_full_histograms.dropna()


sex_counts = df_full_parcellations['sex'].value_counts()
sex_counts


sex
1.0    128
2.0     96
Name: count, dtype: int64

In [11]:
df_females_parcellations = df_full_parcellations[df_full_parcellations['sex'] == 1]
df_males_parcellations = df_full_parcellations[df_full_parcellations['sex'] == -1]

df_females_histograms = df_full_histograms[df_full_histograms['sex'] == 1]
df_males_histograms = df_full_histograms[df_full_histograms['sex'] == -1]

# Randomly select an equal number of female subjects
df_females_parcellations_reduced = df_females_parcellations.sample(n=len(df_males_parcellations), random_state=123)
df_females_histograms_reduced = df_females_histograms.sample(n=len(df_males_histograms), random_state=123)

# Combine the male subjects with the selected female subjects
df_balanced_parcellations = pd.concat([df_females_parcellations_reduced, df_males_parcellations])
df_balanced_histograms = pd.concat([df_females_histograms_reduced, df_males_histograms])

df_balanced_parcellations = df_balanced_parcellations.sample(frac=1, random_state=42).reset_index(drop=True)
df_balanced_histograms = df_balanced_histograms.sample(frac=1, random_state=42).reset_index(drop=True)

In [15]:
creator = PipelineCreator(problem_type="classification")
creator.add("zscore")
creator.add(
    "svm"
)

<julearn.pipeline.pipeline_creator.PipelineCreator at 0x7f0722c69890>

In [16]:
search_params = {
    "kind": "optuna",
    "cv":4
}

scoring = ["balanced_accuracy", "accuracy"]
scores_hists, model_hists, inspector_hists = run_cross_validation(
    X=X_histograms,
    y='sex',
    data=df_full_histograms,
    search_params=search_params,
    model=creator,
    return_train_score=True,
    return_inspector=True,
    cv=4,
    scoring = scoring,
)
scores_schaefer, model_schaefer, inspector_schaefer = run_cross_validation(
    X=X_parcellations,
    y='sex',
    data=df_full_parcellations,
    search_params=search_params,
    model=creator,
    return_train_score=True,
    return_inspector=True,
    cv=4,
    scoring = scoring,
)

  warn_with_log(

  warn_with_log(



In [17]:
scores_schaefer

Unnamed: 0,fit_time,score_time,estimator,test_balanced_accuracy,train_balanced_accuracy,test_accuracy,train_accuracy,n_train,n_test,repeat,fold,cv_mdsum
0,0.005141,0.003434,"(SetColumnTypes(X_types={}), StandardScaler(),...",0.494792,0.751736,0.553571,0.785714,168,56,0,0,bc7087515161a73a5a6aff57863f3803
1,0.005174,0.003498,"(SetColumnTypes(X_types={}), StandardScaler(),...",0.464286,0.695882,0.464286,0.75,168,56,0,1,bc7087515161a73a5a6aff57863f3803
2,0.004956,0.003382,"(SetColumnTypes(X_types={}), StandardScaler(),...",0.507742,0.762451,0.553571,0.797619,168,56,0,2,bc7087515161a73a5a6aff57863f3803
3,0.005096,0.003425,"(SetColumnTypes(X_types={}), StandardScaler(),...",0.497866,0.835165,0.607143,0.845238,168,56,0,3,bc7087515161a73a5a6aff57863f3803


In [18]:
scores_hists

Unnamed: 0,fit_time,score_time,estimator,test_balanced_accuracy,train_balanced_accuracy,test_accuracy,train_accuracy,n_train,n_test,repeat,fold,cv_mdsum
0,0.005829,0.003575,"(SetColumnTypes(X_types={}), StandardScaler(),...",0.5,0.539931,0.571429,0.60119,168,56,0,0,bc7087515161a73a5a6aff57863f3803
1,0.00507,0.003407,"(SetColumnTypes(X_types={}), StandardScaler(),...",0.428571,0.543529,0.428571,0.619048,168,56,0,1,bc7087515161a73a5a6aff57863f3803
2,0.005203,0.003595,"(SetColumnTypes(X_types={}), StandardScaler(),...",0.467742,0.528169,0.517857,0.60119,168,56,0,2,bc7087515161a73a5a6aff57863f3803
3,0.00502,0.003486,"(SetColumnTypes(X_types={}), StandardScaler(),...",0.486486,0.519481,0.642857,0.559524,168,56,0,3,bc7087515161a73a5a6aff57863f3803


In [9]:
scores_hists['model'] = 'AOMIC_Histograms'
scores_schaefer['model'] = 'AOMIC_Schaefer'
plot_scores(scores_schaefer,scores_hists)


BokehModel(combine_events=True, render_bundle={'docs_json': {'2defe295-7a83-412d-b799-97f545dbdde3': {'version…

In [14]:
stats_df = corrected_ttest(scores_schaefer,scores_hists)
print(stats_df)

  warn_with_log(

  warn_with_log(



                    metric    t-stat     p-val         model_1  \
0   test_balanced_accuracy       NaN       NaN  AOMIC_Schaefer   
1  train_balanced_accuracy  0.631186  0.592435  AOMIC_Schaefer   
2            test_accuracy       NaN       NaN  AOMIC_Schaefer   
3           train_accuracy  0.631186  0.592435  AOMIC_Schaefer   

            model_2  p-val-corrected  
0  AOMIC_Histograms              NaN  
1  AOMIC_Histograms         0.592435  
2  AOMIC_Histograms              NaN  
3  AOMIC_Histograms         0.592435  
