In [1]:
from junifer.storage import HDF5FeatureStorage
from julearn.api import run_cross_validation
from julearn.pipeline import PipelineCreator
from julearn.viz import plot_scores
from julearn.stats.corrected_ttest import corrected_ttest
import pandas as pd
import seaborn as sns
from sklearn.svm import LinearSVC
import warnings


It is highly recommended to configure Git before using DataLad. Set both 'user.name' and 'user.email' configuration variables.


In [2]:
storage = HDF5FeatureStorage(uri='./data/AOMIC_Parcels_sch100x7.hdf5')

In [22]:
df_parcellations = storage.read_df('VBM_GM_Schaefer100x7_mean_aggregation')
df_histograms= storage.read_df('VBM_GM_Histogram_100bins_IXI_hist')
df_demographics = pd.read_csv('./data/participants.tsv',sep='\t')
df_demographics.rename(columns={"participant_id": "subject"}, inplace=True)

In [23]:
df_parcellations.dropna(inplace=True)
df_histograms.dropna(inplace=True)


In [24]:
df_parcellations.columns = df_parcellations.columns.astype(str)
df_histograms.columns = df_histograms.columns.astype(str)

# List of columns for each dataframe
X_parcellations = list(df_parcellations.columns)
X_histograms = list(df_histograms.columns)

# Merge with df_demographics on 'subject'
df_full_parcellations = df_parcellations.merge(df_demographics, on="subject")
df_full_histograms = df_histograms.merge(df_demographics, on="subject")


In [25]:
creator = PipelineCreator(problem_type="classification")
creator.add("zscore")
creator.add(
    "svm",
    C=(0.001, 100, "log-uniform"),
)

<julearn.pipeline.pipeline_creator.PipelineCreator at 0x7f8ba31a2a10>

In [30]:
search_params = {
    "kind": "optuna",
    "cv":4
}

scoring = ["balanced_accuracy", "accuracy"]

scores_hists, model_hists, inspector_hists = run_cross_validation(
    X=X_histograms,
    y='sex',
    data=df_full_histograms,
    search_params=search_params,
    model=creator,
    return_train_score=True,
    return_inspector=True,
    cv=4,
    scoring = scoring,

)


scores_schaefer, model_schaefer, inspector_schaefer = run_cross_validation(
    X=X_parcellations,
    y='sex',
    data=df_full_parcellations,
    search_params=search_params,
    model=creator,
    return_train_score=True,
    return_inspector=True,
    cv=4,
    scoring = scoring,
)

  warn_with_log(

  pipeline = search(  # type: ignore

  new_object = klass(**new_object_params)

[I 2024-09-24 01:50:20,732] A new study created in memory with name: no-name-40a9a2cd-5151-4d05-940d-4a0a3c7e863c
[I 2024-09-24 01:50:20,809] Trial 0 finished with value: 0.6982248520710059 and parameters: {'svm__C': 2.480889790314321}. Best is trial 0 with value: 0.6982248520710059.
[I 2024-09-24 01:50:20,885] Trial 1 finished with value: 0.6952662721893491 and parameters: {'svm__C': 2.1984889335981284}. Best is trial 0 with value: 0.6982248520710059.
[I 2024-09-24 01:50:20,969] Trial 2 finished with value: 0.6183431952662722 and parameters: {'svm__C': 0.030568773339542808}. Best is trial 0 with value: 0.6982248520710059.
[I 2024-09-24 01:50:21,053] Trial 3 finished with value: 0.47633136094674555 and parameters: {'svm__C': 0.001846006568116291}. Best is trial 0 with value: 0.6982248520710059.
[I 2024-09-24 01:50:21,137] Trial 4 finished with value: 0.47633136094674555 and parameters: {'

In [31]:
scores_schaefer

Unnamed: 0,fit_time,score_time,estimator,test_balanced_accuracy,train_balanced_accuracy,test_accuracy,train_accuracy,n_train,n_test,repeat,fold,cv_mdsum
0,0.839818,0.008981,"OptunaSearchCV(cv=KFold(n_splits=4, random_sta...",0.840558,0.964941,0.836283,0.964497,676,226,0,0,bc7087515161a73a5a6aff57863f3803
1,0.811612,0.009305,"OptunaSearchCV(cv=KFold(n_splits=4, random_sta...",0.809943,0.99565,0.80531,0.995562,676,226,0,1,bc7087515161a73a5a6aff57863f3803
2,0.883196,0.009518,"OptunaSearchCV(cv=KFold(n_splits=4, random_sta...",0.844477,0.935275,0.84,0.935007,677,225,0,2,bc7087515161a73a5a6aff57863f3803
3,0.817973,0.009235,"OptunaSearchCV(cv=KFold(n_splits=4, random_sta...",0.799818,0.915409,0.8,0.914328,677,225,0,3,bc7087515161a73a5a6aff57863f3803


In [32]:
scores_hists

Unnamed: 0,fit_time,score_time,estimator,test_balanced_accuracy,train_balanced_accuracy,test_accuracy,train_accuracy,n_train,n_test,repeat,fold,cv_mdsum
0,0.823943,0.009692,"OptunaSearchCV(cv=KFold(n_splits=4, random_sta...",0.758276,0.792859,0.756637,0.794379,676,226,0,0,bc7087515161a73a5a6aff57863f3803
1,0.818658,0.00912,"OptunaSearchCV(cv=KFold(n_splits=4, random_sta...",0.768868,0.824863,0.761062,0.828402,676,226,0,1,bc7087515161a73a5a6aff57863f3803
2,0.925513,0.009872,"OptunaSearchCV(cv=KFold(n_splits=4, random_sta...",0.738978,0.789465,0.751111,0.790251,677,225,0,2,bc7087515161a73a5a6aff57863f3803
3,0.855256,0.0093,"OptunaSearchCV(cv=KFold(n_splits=4, random_sta...",0.679678,0.824127,0.68,0.827179,677,225,0,3,bc7087515161a73a5a6aff57863f3803


In [33]:
scores_hists['model'] = 'AOMIC_Histograms'
scores_schaefer['model'] = 'AOMIC_Schaefer'
plot_scores(scores_schaefer,scores_hists)


BokehModel(combine_events=True, render_bundle={'docs_json': {'e36c1753-fb8e-4be3-8ebd-18159853c0a8': {'version…

  warn_with_log(

  warn_with_log(

