In [25]:
from julearn.viz import plot_scores
from seaborn import load_dataset
from julearn import run_cross_validation, PipelineCreator
from julearn.utils import configure_logging
from sklearn.model_selection import StratifiedKFold
from junifer.storage import HDF5FeatureStorage
import pandas as pd
import seaborn as sns

hdf5 = HDF5FeatureStorage('features/ds003097_ReHo.hdf5')
features_df = hdf5.read_df('BOLD_ReHo-Power2013-10mm')
features_df.dropna(inplace=True)

demographics = pd.read_csv('features/participants.tsv', sep='\t')
demographics.rename(columns={'participant_id': 'subject'}, inplace=True)
hemi_lh = pd.read_csv('features/data-cortical_type-aparc_measure-volume_hemi-lh.tsv', sep='\t')
hemi_lh.rename(columns={'lh.aparc.volume': 'subject'}, inplace=True)

def create_pipeline(problem_type, model):
    return PipelineCreator(problem_type).add('zscore').add(**model)

def run_cv(features, y, data_df, cv, pipelines, scoring=None):
    return run_cross_validation(X=features,
                                y=y,
                                data=data_df,
                                model=pipelines,
                                cv=cv, 
                                scoring=scoring, 
                                return_train_score=True,
                                return_inspector=True)

In [26]:
features_df.columns = features_df.columns.astype(str)
data_df = pd.merge(features_df, demographics, on='subject')
data_df = pd.merge(data_df, hemi_lh, on='subject')

In [27]:
features = list(features_df.columns)
y = 'sex'
problem_type = 'classification'
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [28]:
svm_linear = create_pipeline(problem_type, {'step': 'svm', 'kernel': 'linear', 'C': [0.1, 1, 10]})
svm_nolinear = create_pipeline(problem_type, {'step': 'svm', 'kernel': ['rbf', 'sigmoid'], 'C': [0.1, 1, 10], 'gamma': [1.e-09, 1.e-02, 1.e+03]})
svm_scores, svm, svm_inspector = run_cv(features, y, data_df, cv, [svm_linear, svm_nolinear])
# rf = run_cv(features, y, df_penguins, problem_type, cv, {'step': 'rf'}, scoring)
# dummy = run_cv(features, y, df_penguins, problem_type, cv, {'step': 'dummy'}, scoring)
svm_scores['model'] = 'svm'

The following columns are not defined in X_types: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', 

In [4]:
svm_scores

Unnamed: 0,fit_time,score_time,estimator,test_score,train_score,n_train,n_test,repeat,fold,cv_mdsum,model
0,10.818428,0.015052,"GridSearchCV(cv=KFold(n_splits=5, random_state...",0.843023,0.981077,687,172,0,0,4bbff3df81cf8854822a1b557f37a710,svm
1,10.128311,0.014928,"GridSearchCV(cv=KFold(n_splits=5, random_state...",0.767442,0.978166,687,172,0,1,4bbff3df81cf8854822a1b557f37a710,svm
2,9.800977,0.012676,"GridSearchCV(cv=KFold(n_splits=5, random_state...",0.77907,1.0,687,172,0,2,4bbff3df81cf8854822a1b557f37a710,svm
3,9.84559,0.015255,"GridSearchCV(cv=KFold(n_splits=5, random_state...",0.796512,0.983988,687,172,0,3,4bbff3df81cf8854822a1b557f37a710,svm
4,9.362524,0.013824,"GridSearchCV(cv=KFold(n_splits=5, random_state...",0.859649,0.975291,688,171,0,4,4bbff3df81cf8854822a1b557f37a710,svm


In [5]:
import pandas as pd
pd.DataFrame(svm_scores.estimator.apply(lambda x: x.best_params_).tolist())

Unnamed: 0,set_column_types,svm,svm__C,zscore
0,SetColumnTypes(X_types={}),SVC(kernel='linear'),0.1,StandardScaler()
1,SetColumnTypes(X_types={}),SVC(kernel='linear'),0.1,StandardScaler()
2,SetColumnTypes(X_types={}),SVC(kernel='linear'),1.0,StandardScaler()
3,SetColumnTypes(X_types={}),SVC(kernel='linear'),0.1,StandardScaler()
4,SetColumnTypes(X_types={}),SVC(kernel='linear'),0.1,StandardScaler()
