In [None]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import tqdm
import mofax as mfx
import warnings
import random

from skbio.stats.distance import permanova
from gemelli.rpca import joint_rpca, rpca
from helper_functions import simple_blocks
from sklearn.model_selection import train_test_split
from skbio.stats.composition import clr
from biom import Table

from sklearn.ensemble import RandomForestClassifier
from skbio import DistanceMatrix
from scipy.spatial import distance
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (average_precision_score, roc_auc_score)
from tqdm.notebook import tqdm

%matplotlib inline
warnings.filterwarnings('ignore', category=RuntimeWarning)

### Data simulation 

In [None]:
random.seed(42)
n_samples = 50
tables_save_joint = {}
feat_induced_dict = {}
n_induced_lst = [5, 10, 20, 30, 40, 50, 60]

for n_induced in n_induced_lst:

    feat_induced_lst = []
    
    for i_, n_features in enumerate([100, 150, 200]):

        feat_signal_idx = random.sample(range(0, n_features), n_induced)
        feat_ids = ['omic%s_f%i' % (i_+1, x) for x in feat_signal_idx]
        feat_induced_lst.append(feat_ids)

        fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 6))
        overlap_ = int(0.5*n_features) #ensure there's no underlying signal

        # build simple three block model
        np.random.seed(42)
        bt_base_tmp, bt_sim_tmp, mf_sim_tmp = simple_blocks(n_samples, n_features, omic_id="omic%s" %(i_+1),
                                                            overlap=overlap_, n_blocks=2, percent_missing=2)
        table_array = bt_sim_tmp.matrix_data.toarray().T
        
        #define cases from controls and features that get signal
        controls_idx = np.arange(0, int(n_samples/2))     # first half are controls
        signal_features = feat_signal_idx.copy()          # features that get signal

        #sample from poisson dist
        poisson_lambda = 1.0
        poisson_signal = np.random.poisson(lam=poisson_lambda, size=(len(controls_idx), len(signal_features)))

        # Add signal to selected samples/features
        table_array[controls_idx[:, None], signal_features] += poisson_signal    
        
        #convert back to biom
        table_biom = Table(table_array.T, 
                        bt_sim_tmp.ids('observation'), 
                        bt_sim_tmp.ids('sample'))

        tables_save_joint[("fsignal_%s" %n_induced, "f_%i" %n_features)] = table_biom.copy()
        print("Done simulating data...")

    #save ids of the features with signal
    feat_induced_dict[n_induced] = feat_induced_lst

In [None]:
#check densities
#tables_save_joint

In [None]:
# #save tables
# #with open('../../data/simulations/lowrank/tables_three_microb_fsignal.pkl', 'wb') as file:
# #    pickle.dump(tables_save_joint, file)

# #save IDs of features with induced signal
# with open('../../data/simulations/lowrank/microbs_with_signals.pkl', 'wb') as file:
#     pickle.dump(feat_induced_dict, file)
# #changes in v2: poisson signal and number of induced features

In [None]:
#create train-test splits
'''np.random.seed(42)
metadata = mf_sim_tmp.copy()

for f in range(10):
    f += 1
    train_, test_ = train_test_split(metadata, shuffle=True,
                                     stratify=metadata['groups'],
                                     test_size=0.25)
    metadata['train_test_%i' % (f)] = 'train'
    metadata.loc[test_.index, 'train_test_%i' % (f)] = 'test'

save metadata
metadata.to_csv('../../data/simulations/lowrank/metadata_diff_sparsities.csv')'''

#load metadata
metadata = pd.read_csv('../../data/simulations/lowrank/metadata_diff_sparsities.csv', index_col=0)

### MOFA+ Stacked Tables

In [9]:
tables_joint = []

for n_induced in n_induced_lst:
    tables_joint.append(
        [("fsignal_%s" %n_induced, 'f_100'), 
         ("fsignal_%s" %n_induced, 'f_150'), 
         ("fsignal_%s" %n_induced, 'f_200')])

tables_joint = tables_joint[::-1]
n_induced_idx = n_induced_lst[::-1]
print(tables_joint)
print(n_induced_idx)

[[('fsignal_60', 'f_100'), ('fsignal_60', 'f_150'), ('fsignal_60', 'f_200')], [('fsignal_50', 'f_100'), ('fsignal_50', 'f_150'), ('fsignal_50', 'f_200')], [('fsignal_40', 'f_100'), ('fsignal_40', 'f_150'), ('fsignal_40', 'f_200')], [('fsignal_30', 'f_100'), ('fsignal_30', 'f_150'), ('fsignal_30', 'f_200')], [('fsignal_20', 'f_100'), ('fsignal_20', 'f_150'), ('fsignal_20', 'f_200')], [('fsignal_10', 'f_100'), ('fsignal_10', 'f_150'), ('fsignal_10', 'f_200')], [('fsignal_5', 'f_100'), ('fsignal_5', 'f_150'), ('fsignal_5', 'f_200')], [('fsignal_2', 'f_100'), ('fsignal_2', 'f_150'), ('fsignal_2', 'f_200')]]
[60, 50, 40, 30, 20, 10, 5, 2]


In [None]:
for idx, tables_ in enumerate(tables_joint):
    print(tables_)
    for f_ in range(10):
        f_ += 1
        tt_col = 'train_test_%i' % (f_)
        train_ = metadata[metadata[tt_col] == 'train'].index
        test_ = metadata[metadata[tt_col] == 'test'].index

        for use_sub_, lbl_out_ in zip([train_, test_], ['train','test']):
            stacked_clr_tables = {t_:pd.DataFrame(tables_save_joint[t_].matrix_data.toarray() + 0.1,
                                                  tables_save_joint[t_].ids('observation'),
                                                  tables_save_joint[t_].ids()).loc[:, use_sub_].apply(clr).stack().reset_index()
                                  for t_ in tables_}
            stacked_clr_tables = pd.concat(stacked_clr_tables)
            stacked_clr_tables.columns = ["feature","sample","value"]
            stacked_clr_tables = stacked_clr_tables.reset_index().drop(['level_0'], axis=1).rename({'level_1':'view'}, axis=1)
            stacked_clr_tables = stacked_clr_tables[["sample","feature","value","view"]]
            #stacked_clr_tables.to_csv('../../data/simulations/lowrank/mofa_tables_fsignal/fold-%i-subset-%s-fsignal%s.tsv.gz' % 
            #                         (f_, lbl_out_, n_induced_idx[idx]), sep='\t', compression='gzip')

[('fsignal_60', 'f_100'), ('fsignal_60', 'f_150'), ('fsignal_60', 'f_200')]
[('fsignal_50', 'f_100'), ('fsignal_50', 'f_150'), ('fsignal_50', 'f_200')]
[('fsignal_40', 'f_100'), ('fsignal_40', 'f_150'), ('fsignal_40', 'f_200')]
[('fsignal_30', 'f_100'), ('fsignal_30', 'f_150'), ('fsignal_30', 'f_200')]
[('fsignal_20', 'f_100'), ('fsignal_20', 'f_150'), ('fsignal_20', 'f_200')]
[('fsignal_10', 'f_100'), ('fsignal_10', 'f_150'), ('fsignal_10', 'f_200')]
[('fsignal_5', 'f_100'), ('fsignal_5', 'f_150'), ('fsignal_5', 'f_200')]
[('fsignal_2', 'f_100'), ('fsignal_2', 'f_150'), ('fsignal_2', 'f_200')]


### Joint-RPCA

In [None]:
joint_ord_results = {}

for idx, tables_depth in enumerate(tables_joint):
    print(tables_depth)
    for f in range(10):
        f += 1

        #now, rerun Joint-RPCA with this number of iterations
        ord_, _, cv_ = joint_rpca([tables_save_joint[joint_use_x].copy()
                                    for joint_use_x in tables_depth],
                                    max_iterations=50,
                                    sample_metadata=metadata,
                                    train_test_column='train_test_%i' % (f))
        ord_plt = pd.concat([ord_.samples, metadata], axis=1)
        joint_ord_results[n_induced_idx[idx], f] = (ord_plt, ord_.features, cv_)
        display(ord_.features.sort_values(by='PC1', ascending=False).head(5))
        display(ord_.features.sort_values(by='PC1', ascending=False).tail(5))

        fig, ax = plt.subplots(1, 2, figsize=(10, 4))
        sns.scatterplot(x='PC1', y='PC2', hue='groups', s=60, data=ord_plt, ax=ax[0])
        ax[0].set_title('Joint-RPCA: {}'.format(n_induced_idx[idx]), 
                        color='black', weight='bold', fontsize=18)
        ax[0].set_ylabel('PC2', color='black', fontsize=15)
        ax[0].set_xlabel('PC1', color='black', fontsize=15)
        ax[0].set_facecolor('white')
        ax[0].set_axisbelow(True)
        sns.despine(ax=ax[0])

        for child in ax[0].get_children():
            if isinstance(child, matplotlib.spines.Spine):
                child.set_color('black')
        for tick in ax[0].get_xticklabels():
            tick.set_fontproperties('arial')
            tick.set_color("black")
            tick.set_weight("bold")
            tick.set_fontsize(12)
        for tick in ax[0].get_yticklabels():
            tick.set_fontproperties('arial')
            tick.set_color("black")
            tick.set_weight("bold")
            tick.set_fontsize(12)

In [None]:
# with open('../../data/simulations/lowrank/joint-rpca_three_microb_fsignal.pkl', 'wb') as file:
#   pickle.dump(joint_ord_results, file)

### Run all other methods with scripts 1.3-1.6

### MOFA+ Projections

In [None]:
path_to_mofa = '../../data/simulations/lowrank/mofa_fsignal/'
ord_res_mofa = {}
rename_cols = {'Factor1': 'PC1', 'Factor2': 'PC2', 'Factor3': 'PC3'}

for d in n_induced_idx:
    #print(d)
    for f in range(1,11):
        # save ordination
        df_ = pd.read_csv('%s%i.factors.model.fsignal%s.csv' % (path_to_mofa, f, d), index_col=0)
        df_ = pd.pivot_table(columns='factor', values='value', index='sample', data=df_)
        df_.rename(columns=rename_cols,inplace=True)
        # center factors
        df_ -= df_.mean()
        df_ /= df_.std()

        model = mfx.mofa_model("%s%i.model.fsignal%s.hdf5TRUE" % (path_to_mofa, f, d))
        test_data_project = pd.read_csv("../../data/simulations/lowrank/mofa_tables_fsignal/fold-%i-subset-test-fsignal%s.tsv.gz" % (f, d), 
                                        index_col=0, sep='\t', compression='gzip')
        # project new data
        # MOFAx projects on a single view, they vary greatly so we project on each one to be fair.
        #print(model.views)
        for view_use in model.views:
            test_data_project_view = test_data_project[test_data_project.view == view_use]
            test_data_project_view_X = pd.pivot_table(columns='sample',
                                                      index='feature',
                                                      values='value', data=test_data_project_view)
            # ensure projection data is ordered the same as the input data
            test_data_project_view_X = test_data_project_view_X.loc[[x for x in model.features[view_use]], :]
            # project
            new_values = np.stack([model.project_data(test_data_project_view_X.values.T, 
                                                      view=view_use, factors=i) for i in range(3)]).T
            projected_factors = pd.DataFrame(new_values, test_data_project_view_X.columns, [['PC1','PC2','PC3']])
            projected_factors.columns = ['PC1','PC2','PC3']
            # center projections
            projected_factors -= projected_factors.mean()
            projected_factors /= projected_factors.std()
            df_projected_ = pd.concat([df_, projected_factors], axis=0)
            df_projected_.to_csv("%sprojections/fold-%i-fsignal%s-projected-on-fsignal%s.csv"
                                 % (path_to_mofa, f, d, view_use))
            #add metdata and save
            df_projected_ = pd.concat([df_projected_, metadata], axis=1)
            ord_res_mofa[(d, f, view_use)] = df_projected_
        # close model now that we are done
        model.close()

### RF Classification

In [44]:
#load results (if coming back)
with open('../../data/simulations/lowrank/joint-rpca_three_microb_fsignal.pkl', 'rb') as file:
   joint_ord_results = pickle.load(file)

#load mofa results (with projections)
path_to_mofa = '../../data/simulations/lowrank/mofa_fsignal/'
ord_res_mofa = {}
n_induced_idx = [5, 10, 20, 30, 40, 50, 60]

for d in n_induced_idx:
   for f in range(1,11):
      for view_use in ['f_100', 'f_150', 'f_200']:
         df_projected_ = pd.read_csv("%sprojections/fold-%i-fsignal%s-projected-on-fsignal%s.csv"
                                     % (path_to_mofa, f, d, view_use), index_col=0)
         #add metdata and save
         df_projected_ = pd.concat([df_projected_, metadata], axis=1)
         ord_res_mofa[(d, f, view_use)] = df_projected_

In [34]:
ord_res_mixomics = {}

for d in n_induced_idx:
   for f in range(1,11):
    df_ = pd.read_csv('../../data/simulations/lowrank/mixomics_fsignal/%s.factors.model.fsignal%s.csv'
                      % (f, d), index_col=0)
    
    for view_use, view_df in df_.groupby('.id'):
       view_df = view_df.set_index('subject')[['comp1','comp2','comp3']]
       view_df.columns = ['PC1','PC2','PC3']
       view_df = pd.concat([view_df, metadata], axis=1)
       ord_res_mixomics[(d, f, view_use)] = view_df

In [35]:
ord_res_icluster = {}

for d in n_induced_idx:
   for f in range(1,11):
    # save ordination
    view_df = pd.read_csv('../../data/simulations/lowrank/icluster_fsignal/%s.factors.model.fsignal%s.csv'
                      % (f, d), index_col=0)
    view_df.columns = ['PC1','PC2','PC3']
    view_df = pd.concat([view_df, metadata], axis=1)
    ord_res_icluster[(d, f)] = view_df

In [36]:
ord_res_intNMF = {}

for d in n_induced_idx:
   for f in range(1,11):
    # save ordination
    view_df = pd.read_csv('../../data/simulations/lowrank/intNMF_fsignal/%s.factors.model.fsignal%s.csv'
                      % (f, d), index_col=0)
    view_df.columns = ['PC1','PC2','PC3']
    view_df = pd.concat([view_df, metadata], axis=1)
    ord_res_intNMF[(d, f)] = view_df

In [42]:
ord_res_all = {**{(k[0], k[1], 'Joint-RPCA'):v for k, v in joint_ord_results.items()},
               **{(k[0], k[1], 'iCluster'):v for k, v in ord_res_icluster.items()},
               **{(k[0], k[1], 'intNMF'):v for k, v in ord_res_intNMF.items()},}

permanova_scores = {}
apr_scores = {}
auc_roc_scores = {}

cols_learn = ['PC1','PC2','PC3']
covert_map = {'g0': 1, 'g1': 0}
classifier = RandomForestClassifier(n_estimators=500, random_state=1010)

for fold in tqdm(range(10)):
    fold += 1
    # for all sparsity levels
    for depth_ in n_induced_idx:
        for metric_ in ['iCluster','intNMF','Joint-RPCA']:
            # make a list to append for the dicts
            if (depth_, metric_) not in permanova_scores.keys():
                permanova_scores[(depth_, metric_)] = []
                apr_scores[(depth_, metric_)] = []
                auc_roc_scores[(depth_, metric_)] = []
            
            # get the ordination data
            if metric_ == 'Joint-RPCA':
                tbl = ord_res_all[(depth_, fold, metric_)][0].copy()
            else:
                tbl = ord_res_all[(depth_, fold, metric_)].copy()
            tbl = tbl[cols_learn].dropna(subset=cols_learn)
            # get labels
            metadata_train = metadata[metadata['train_test_%i' % (fold)] == 'train']
            metadata_test = metadata[metadata['train_test_%i' % (fold)] == 'test']
            metadata_train = metadata_train.loc[list(set(tbl.index) & set(metadata_train.index)), :]
            metadata_test = metadata_test.loc[list(set(tbl.index) & set(metadata_test.index)), :]
            y_train = list(metadata_train['groups'].values)
            y_train = [covert_map[i] for i in y_train]
            y_test = list(metadata_test['groups'].values)
            y_test = [covert_map[i] for i in y_test]
            X_train = tbl.loc[metadata_train.index, :].values
            X_test = tbl.loc[metadata_test.index, :].values
            # permanova on test data (projection)
            dist_tmp = DistanceMatrix(distance.cdist(tbl.loc[metadata_test.index, :], 
                                                    tbl.loc[metadata_test.index, :]), 
                                                    tbl.loc[metadata_test.index, :].index)
            permanova_scores[(depth_, metric_)].append(
                permanova(dist_tmp, metadata_test.loc[dist_tmp.ids, ['groups']].iloc[:, 0], permutations=1)['test statistic'])
            # ML
            scaler = StandardScaler()
            X_train = scaler.fit_transform(X_train)
            X_test = scaler.transform(X_test)
            classifier.fit(X_train, y_train)
            y_score = classifier.predict_proba(X_test)[:, 1]
            y_pred = classifier.predict(X_test)
            apr_scores[(depth_, metric_)].append(average_precision_score(y_test, y_score))
            auc_roc_scores[(depth_, metric_)].append(roc_auc_score(y_test, y_score))

  0%|          | 0/10 [00:00<?, ?it/s]

In [45]:
ord_res_all = {**{(k[0], k[1], k[2], 'MOFA'):v for k, v in ord_res_mofa.items()},
               **{(k[0], k[1], k[2], 'mixOmics'):v for k, v in ord_res_mixomics.items()}}

permanova_mofa = {}
apr_mofa = {}
auc_mofa = {}

cols_learn = ['PC1','PC2','PC3']
covert_map = {'g0': 1, 'g1': 0}
classifier = RandomForestClassifier(n_estimators=500, random_state=1010)

for fold in tqdm(range(10)):
    fold += 1
    #iterate through each depth
    for depth_ in n_induced_idx:
        for metric_ in ['MOFA', 'mixOmics']:
            #and then each projection
            for other_tbl_ in ['f_100', 'f_150', 'f_200']:
                #print(fold, depth_, other_tbl_)
                # make a list to append for the dicts
                if (depth_, other_tbl_, metric_) not in permanova_mofa.keys():
                    permanova_mofa[(depth_, other_tbl_, metric_)] = []
                    apr_mofa[(depth_, other_tbl_, metric_)] = []
                    auc_mofa[(depth_, other_tbl_, metric_)] = []
                # get the ordination data
                tbl = ord_res_all[(depth_, fold, other_tbl_, metric_)].copy()
                # there might be iterations where only 2PCs were returned
                tbl = tbl.dropna(axis=1)
                cols_learn_ = [col for col in cols_learn if col in tbl.columns]
                tbl = tbl[cols_learn_].dropna(subset=cols_learn_)
                # get labels
                metadata_train = metadata[metadata['train_test_%i' % (fold)] == 'train']
                metadata_test = metadata[metadata['train_test_%i' % (fold)] == 'test']
                metadata_train = metadata_train.loc[list(set(tbl.index) & set(metadata_train.index)), :]
                metadata_test = metadata_test.loc[list(set(tbl.index) & set(metadata_test.index)), :]
                y_train = list(metadata_train['groups'].values)
                y_train = [covert_map[i] for i in y_train]
                y_test = list(metadata_test['groups'].values)
                y_test = [covert_map[i] for i in y_test]
                X_train = tbl.loc[metadata_train.index, :].values
                X_test = tbl.loc[metadata_test.index, :].values
                # permanova on test data (projection)
                dist_tmp = DistanceMatrix(distance.cdist(tbl.loc[metadata_test.index, :],
                                                        tbl.loc[metadata_test.index, :]),
                                                        tbl.loc[metadata_test.index, :].index)
                permanova_mofa[(depth_, other_tbl_, metric_)].append(
                    permanova(dist_tmp, metadata_test.loc[dist_tmp.ids, ['groups']].iloc[:, 0], permutations=1)['test statistic'])
                # ML
                scaler = StandardScaler()
                X_train = scaler.fit_transform(X_train)
                X_test = scaler.transform(X_test)
                classifier.fit(X_train, y_train)
                y_score = classifier.predict_proba(X_test)[:, 1]
                y_pred = classifier.predict(X_test)
                apr_mofa[(depth_, other_tbl_, metric_)].append(average_precision_score(y_test, y_score))
                auc_mofa[(depth_, other_tbl_, metric_)].append(roc_auc_score(y_test, y_score))

  0%|          | 0/10 [00:00<?, ?it/s]

In [None]:
#joint-rpca
joint_permanova_all_df = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in permanova_scores.items() ])).T.stack().reset_index().dropna(axis=1)
joint_permanova_all_df.columns = ['ninduced','method','fold','f_stat']
joint_permanova_all_df['projection'] = 'All'
#mofa
mofa_permanova_all_df = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in permanova_mofa.items() ])).T.stack().reset_index().dropna(axis=1)
mofa_permanova_all_df.columns = ['ninduced','projection','method','fold','f_stat']
mofa_permanova_all_df = mofa_permanova_all_df[['ninduced','method','fold','f_stat','projection']]
#append
permanova_all_df = pd.concat([joint_permanova_all_df, mofa_permanova_all_df], axis=0)
print(permanova_all_df.method.value_counts())
permanova_all_df.to_csv('../../data/simulations/lowrank/permanova-sample-level-fsignal-all.csv')
display(permanova_all_df.head())
#display(permanova_all_df.tail())

mixOmics      210
MOFA          210
iCluster       70
Joint-RPCA     70
intNMF         70
Name: method, dtype: int64


Unnamed: 0,ninduced,method,fold,f_stat,projection
0,5,iCluster,0,0.639686,All
1,5,iCluster,1,0.814637,All
2,5,iCluster,2,0.715067,All
3,5,iCluster,3,0.45315,All
4,5,iCluster,4,0.369297,All


In [58]:
#joint-rpca
joint_apr_all_df = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in apr_scores.items() ])).T.stack().reset_index().dropna(axis=1)
joint_apr_all_df.columns = ['ninduced','method','fold','apr']
joint_apr_all_df['projection'] = 'All'
#mofa
mofa_apr_all_df = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in apr_mofa.items() ])).T.stack().reset_index().dropna(axis=1)
mofa_apr_all_df.columns = ['ninduced','projection','method','fold','apr']
mofa_apr_all_df = mofa_apr_all_df[['ninduced','method','fold','apr','projection']]
#append
apr_all_df = pd.concat([joint_apr_all_df, mofa_apr_all_df], axis=0)
apr_all_df['apr_error'] = 1 - apr_all_df.apr
print(apr_all_df.method.value_counts())
apr_all_df.to_csv('../../data/simulations/lowrank/apr-sample-level-fsignal-all.csv')
display(apr_all_df.head())
#display(apr_all_df.tail())

mixOmics      210
MOFA          210
iCluster       70
Joint-RPCA     70
intNMF         70
Name: method, dtype: int64


Unnamed: 0,ninduced,method,fold,apr,projection,apr_error
0,5,iCluster,0,0.661565,All,0.338435
1,5,iCluster,1,0.615843,All,0.384157
2,5,iCluster,2,0.538258,All,0.461742
3,5,iCluster,3,0.700168,All,0.299832
4,5,iCluster,4,0.805639,All,0.194361


In [59]:
#joint-rpca
joint_auc_all_df = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in auc_roc_scores.items() ])).T.stack().reset_index().dropna(axis=1)
joint_auc_all_df.columns = ['ninduced','method','fold','roc_auc']
joint_auc_all_df['projection'] = 'All'
#mofa
mofa_auc_all_df = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in auc_mofa.items() ])).T.stack().reset_index().dropna(axis=1)
mofa_auc_all_df.columns = ['ninduced','projection','method','fold','roc_auc']
mofa_auc_all_df = mofa_auc_all_df[['ninduced','method','fold','roc_auc','projection']]
#append
roc_all_df = pd.concat([joint_auc_all_df, mofa_auc_all_df], axis=0)
roc_all_df['roc_error'] = 1 - roc_all_df.roc_auc
print(roc_all_df.method.value_counts())
roc_all_df.to_csv('../../data/simulations/lowrank/roc-sample-level-fsignal-all.csv')
display(roc_all_df.head())
#display(roc_all_df.tail())

mixOmics      210
MOFA          210
iCluster       70
Joint-RPCA     70
intNMF         70
Name: method, dtype: int64


Unnamed: 0,ninduced,method,fold,roc_auc,projection,roc_error
0,5,iCluster,0,0.666667,All,0.333333
1,5,iCluster,1,0.47619,All,0.52381
2,5,iCluster,2,0.404762,All,0.595238
3,5,iCluster,3,0.654762,All,0.345238
4,5,iCluster,4,0.654762,All,0.345238
