In [22]:
from junifer.storage import HDF5FeatureStorage
from julearn.api import run_cross_validation
from julearn.pipeline import PipelineCreator
from julearn.viz import plot_scores
from julearn.stats.corrected_ttest import corrected_ttest
import pandas as pd
import seaborn as sns
from sklearn.svm import LinearSVC
import warnings


In [23]:
storage = HDF5FeatureStorage(uri='./data/AOMIC_Histograms_Parcels.hdf5')

In [24]:
storage.list_features()

{'a619bd2605f3c1038383188f6c118f98': {'datagrabber': {'class': 'DataladAOMICID1000',
   'native_t1w': False,
   'replacements': ['subject'],
   'patterns': {'BOLD': {'pattern': 'derivatives/fmriprep/{subject}/func/{subject}_task-moviewatching_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz',
     'space': 'MNI152NLin2009cAsym',
     'mask': {'pattern': 'derivatives/fmriprep/{subject}/func/{subject}_task-moviewatching_space-MNI152NLin2009cAsym_desc-brain_mask.nii.gz',
      'space': 'MNI152NLin2009cAsym'},
     'confounds': {'pattern': 'derivatives/fmriprep/{subject}/func/{subject}_task-moviewatching_desc-confounds_regressors.tsv',
      'format': 'fmriprep'}},
    'T1w': {'pattern': 'derivatives/fmriprep/{subject}/anat/{subject}_space-MNI152NLin2009cAsym_desc-preproc_T1w.nii.gz',
     'space': 'MNI152NLin2009cAsym',
     'mask': {'pattern': 'derivatives/fmriprep/{subject}/anat/{subject}_space-MNI152NLin2009cAsym_desc-brain_mask.nii.gz',
      'space': 'MNI152NLin2009cAsym'}},
    'V

In [29]:
df_parcellations = storage.read_df('VBM_GM_Schaefer100x17_mean_aggregation')
df_histograms= storage.read_df('VBM_GM_Histogram_100bins_IXI_hist')
df_demographics = pd.read_csv('./data/participants.tsv',sep='\t')
df_demographics.rename(columns={"participant_id": "subject"}, inplace=True)

In [30]:
df_parcellations.columns = df_parcellations.columns.astype(str)
df_histograms.columns = df_histograms.columns.astype(str)

# List of columns for each dataframe
X_parcellations = list(df_parcellations.columns)
X_histograms = list(df_histograms.columns)

# Merge with df_demographics on 'subject'
df_full_parcellations = df_parcellations.merge(df_demographics, on="subject")
df_full_histograms = df_histograms.merge(df_demographics, on="subject")

# Map 'sex' column
#df_full_parcellations['sex'] = df_full_parcellations['sex'].map({'F': 1, 'M': 2})
#df_full_histograms['sex'] = df_full_histograms['sex'].map({'F': 1, 'M': 2})

# Drop rows with NaN values
df_full_parcellations = df_full_parcellations.dropna()
df_full_histograms = df_full_histograms.dropna()



In [31]:
creator = PipelineCreator(problem_type="classification")
creator.add("zscore")
creator.add(
    "svm",
    C=(0.001, 100, "log-uniform"),
)

<julearn.pipeline.pipeline_creator.PipelineCreator at 0x7f3253d69ed0>

In [32]:
search_params = {
    "kind": "optuna",
    "cv":5
}

scoring = ["balanced_accuracy", "accuracy"]

scores_hists, model_hists, inspector_hists = run_cross_validation(
    X=X_histograms,
    y='sex',
    data=df_full_histograms,
    search_params=search_params,
    model=creator,
    return_train_score=True,
    return_inspector=True,
    cv=4,
    scoring = scoring,

)


scores_schaefer, model_schaefer, inspector_schaefer = run_cross_validation(
    X=X_parcellations,
    y='sex',
    data=df_full_parcellations,
    search_params=search_params,
    model=creator,
    return_train_score=True,
    return_inspector=True,
    cv=5,
    scoring = scoring,
)

  warn_with_log(

  pipeline = search(  # type: ignore

  new_object = klass(**new_object_params)

[I 2024-09-23 14:58:23,562] A new study created in memory with name: no-name-86c3fae7-17f5-4ce0-a431-942810e405db
[I 2024-09-23 14:58:23,599] Trial 0 finished with value: 0.5705882352941176 and parameters: {'svm__C': 8.33587804952097}. Best is trial 0 with value: 0.5705882352941176.
[I 2024-09-23 14:58:23,635] Trial 1 finished with value: 0.43986928104575157 and parameters: {'svm__C': 0.005183724974519355}. Best is trial 0 with value: 0.5705882352941176.
[I 2024-09-23 14:58:23,671] Trial 2 finished with value: 0.43986928104575157 and parameters: {'svm__C': 0.0012118448263389754}. Best is trial 0 with value: 0.5705882352941176.
[I 2024-09-23 14:58:23,708] Trial 3 finished with value: 0.4901960784313725 and parameters: {'svm__C': 61.75479619555315}. Best is trial 0 with value: 0.5705882352941176.
[I 2024-09-23 14:58:23,744] Trial 4 finished with value: 0.43986928104575157 and parameters: {'

In [33]:
scores_schaefer

Unnamed: 0,fit_time,score_time,estimator,test_balanced_accuracy,train_balanced_accuracy,test_accuracy,train_accuracy,n_train,n_test,repeat,fold,cv_mdsum
0,0.362427,0.00307,"OptunaSearchCV(cv=KFold(n_splits=5, random_sta...",0.823077,1.0,0.826087,1.0,92,23,0,0,b10eef89b4192178d482d7a1587a248a
1,0.360552,0.003016,"OptunaSearchCV(cv=KFold(n_splits=5, random_sta...",0.871212,0.989362,0.869565,0.98913,92,23,0,1,b10eef89b4192178d482d7a1587a248a
2,0.360982,0.002963,"OptunaSearchCV(cv=KFold(n_splits=5, random_sta...",0.846154,0.923913,0.826087,0.923913,92,23,0,2,b10eef89b4192178d482d7a1587a248a
3,0.355429,0.002966,"OptunaSearchCV(cv=KFold(n_splits=5, random_sta...",0.643939,1.0,0.652174,1.0,92,23,0,3,b10eef89b4192178d482d7a1587a248a
4,0.36055,0.003048,"OptunaSearchCV(cv=KFold(n_splits=5, random_sta...",0.837302,1.0,0.826087,1.0,92,23,0,4,b10eef89b4192178d482d7a1587a248a


In [34]:
scores_hists

Unnamed: 0,fit_time,score_time,estimator,test_balanced_accuracy,train_balanced_accuracy,test_accuracy,train_accuracy,n_train,n_test,repeat,fold,cv_mdsum
0,0.367971,0.003209,"OptunaSearchCV(cv=KFold(n_splits=5, random_sta...",0.610577,0.732558,0.586207,0.732558,86,29,0,0,bc7087515161a73a5a6aff57863f3803
1,0.365895,0.003033,"OptunaSearchCV(cv=KFold(n_splits=5, random_sta...",0.62381,0.733766,0.62069,0.732558,86,29,0,1,bc7087515161a73a5a6aff57863f3803
2,0.360433,0.002999,"OptunaSearchCV(cv=KFold(n_splits=5, random_sta...",0.612981,1.0,0.62069,1.0,86,29,0,2,bc7087515161a73a5a6aff57863f3803
3,0.363556,0.003118,"OptunaSearchCV(cv=KFold(n_splits=5, random_sta...",0.708333,0.683511,0.714286,0.678161,87,28,0,3,bc7087515161a73a5a6aff57863f3803


In [35]:
scores_hists['model'] = 'AOMIC_Histograms'
scores_schaefer['model'] = 'AOMIC_Schaefer'
plot_scores(scores_schaefer,scores_hists)


BokehModel(combine_events=True, render_bundle={'docs_json': {'2809633c-67bf-447b-964a-b8667fbb5477': {'versionâ€¦