## Libraries

In [None]:
from sklearn.preprocessing import FunctionTransformer

from reskit.norms import binar_norm, wbysqdist
from reskit.norms import spectral_norm

from reskit.features import degrees,  pagerank

from reskit.core import Transformer, Pipeliner

from sklearn.feature_selection import VarianceThreshold

from sklearn.preprocessing import MinMaxScaler

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier 
from xgboost import XGBClassifier

from sklearn.model_selection import StratifiedKFold

from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score

import os
import pandas as pd
import numpy as np

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

def orig(x):
    return x

In [2]:
import copy

## Function

In [3]:
def get_autism(path_to_read='Data/dti/', distances=True):
    def get_autism_distances(loc_name):
        with open(loc_name, 'r') as f:
            read_data = f.readlines()

        read_data = pd.DataFrame(
            np.array([np.array(item[:-1].split()).astype(int) for item in read_data]))

        return read_data

    def get_distance_matrix(coords):
        if type(coords) == pd.core.frame.DataFrame:
            coords = coords.values
        elif type(coords) != np.ndarray:
            print('Provide either pandas df or numpy array!')
            return -1

        shape = len(coords)
        dist_matrix = np.zeros((shape, shape))
        del shape
        for i in range(len(coords)):
            for j in range(i + 1, len(coords)):
                dist_matrix[i, j] = np.linalg.norm(coords[i, :] - coords[j, :])
                dist_matrix[j, i] = dist_matrix[i, j]
        return dist_matrix

    target_vector = []  # this will be a target vector (diagnosis)
    matrices = []  # this will be a list of connectomes
    all_files = sorted(os.listdir(path_to_read))
    matrix_files = [
        item for item in all_files if 'DTI_connectivity' in item and 'All' not in item]
    distance_files = [
        item for item in all_files if 'DTI_region_xyz_centers' in item and 'All' not in item]

    # for each file in a sorted (!) list of files:
    for filename in matrix_files:

        A_dataframe = pd.read_csv(
            path_to_read + filename, sep='   ', header=None, engine='python')
        A = A_dataframe.values  # we will use a list of numpy arrays, NOT pandas dataframes
        matrices.append(A)# append a matrix to our list
        if "ASD" in filename:
            target_vector.append(1)
        elif "TD" in filename:
            target_vector.append(0)
    asd_dict = {}
    asd_dict['X'] = np.array(matrices)
    asd_dict['y'] = np.array(target_vector)
    if distances:
        dist_matrix_list = []
        for item in distance_files:
            # print(item)
            cur_coord = get_autism_distances(path_to_read + item)
            cur_dist_mtx = get_distance_matrix(cur_coord)
            dist_matrix_list += [cur_dist_mtx]

        asd_dict['dist'] = np.array(dist_matrix_list)

    return asd_dict

In [4]:
import matrix_eig as me

## Train monster

In [5]:
list_of_files = ["results_1.csv", "results_2.csv", "results_3.csv", "results_4.csv", "results_5.csv", 
                 "results_6.csv", "results_7.csv", "results_8.csv", "results_9.csv", "results_10.csv",
                 "results_11.csv", "results_12.csv", "results_13.csv", "results_14.csv", "results_15.csv",
                 "results_16.csv", "results_17.csv", "results_18.csv", "results_19.csv", "results_20.csv"]

### Log Reg

### Constant parameters

In [8]:
data = [('UCLAsource', Transformer(get_autism))]

weighters = [('origW', Transformer(orig)),
             ('binar', Transformer(orig))]

normalizers = [('origN', Transformer(orig)),
               ('spectral', Transformer(spectral_norm))]

featurizers = [('origF', Transformer(me.orig_vec, collect=['X_vec'])),
               ('low0', Transformer(me.matrix_eig_0, collect=['X_vec'])),
               ('low5', Transformer(me.matrix_eig_5, collect=['X_vec'])),
               ('low10', Transformer(me.matrix_eig_10, collect=['X_vec'])),
               ('low20', Transformer(me.matrix_eig_20, collect=['X_vec'])),
               ('low30', Transformer(me.matrix_eig_30, collect=['X_vec'])),
               ('low40', Transformer(me.matrix_eig_40, collect=['X_vec'])),
               ('low50', Transformer(me.matrix_eig_50, collect=['X_vec'])),
               ('low60', Transformer(me.matrix_eig_60, collect=['X_vec'])),
               ('low70', Transformer(me.matrix_eig_70, collect=['X_vec'])),
               ('low80', Transformer(me.matrix_eig_80, collect=['X_vec'])),
               ('low90', Transformer(me.matrix_eig_90, collect=['X_vec'])),
               ('low100', Transformer(me.matrix_eig_100, collect=['X_vec'])),
               ('low110', Transformer(me.matrix_eig_110, collect=['X_vec'])),
               ('low120', Transformer(me.matrix_eig_120, collect=['X_vec'])),
               ('low130', Transformer(me.matrix_eig_130, collect=['X_vec'])),
               ('low140', Transformer(me.matrix_eig_140, collect=['X_vec'])),
               ('low150', Transformer(me.matrix_eig_150, collect=['X_vec'])),
               ('low160', Transformer(me.matrix_eig_160, collect=['X_vec'])),
               ('low170', Transformer(me.matrix_eig_170, collect=['X_vec'])),
               ('low180', Transformer(me.matrix_eig_180, collect=['X_vec'])),
               ('low190', Transformer(me.matrix_eig_190, collect=['X_vec'])),
               ('low200', Transformer(me.matrix_eig_200, collect=['X_vec'])),
               ('low210', Transformer(me.matrix_eig_210, collect=['X_vec'])),
               ('low220', Transformer(me.matrix_eig_220, collect=['X_vec'])),
               ('low230', Transformer(me.matrix_eig_230, collect=['X_vec'])),
               ('low240', Transformer(me.matrix_eig_240, collect=['X_vec'])),
               ('low250', Transformer(me.matrix_eig_250, collect=['X_vec'])),
               ('low260', Transformer(me.matrix_eig_260, collect=['X_vec'])),
               ('low263', Transformer(me.matrix_eig_263, collect=['X_vec']))
              ]

selectors = [('var_threshold', VarianceThreshold())]

scalers = [('minmax', MinMaxScaler())]

classifiers = [('LR', LogisticRegression())]

steps = [('Data', data),
         ('Weighters', weighters),
         ('Normalizers', normalizers),
         ('Featurizers', featurizers),
         ('Selectors', selectors),
         ('Scalers', scalers),
         ('Classifiers', classifiers)]

param_grid = dict(
     LR=dict(
        C=[0.1*i for i in range(1, 11)],
        max_iter=[50, 100, 500],
        penalty=['l1', 'l2']
    )
    )

### Variable parameters

In [99]:
banned_combos = [('LR', 'origS'),
                 ('LR', 'origN'),
                 ('binar', 'LR')]

list_control = "result_control.csv"



In [14]:
grid_cv = StratifiedKFold(n_splits=10,
                          shuffle=True,
                          random_state=0)

eval_cv = StratifiedKFold(n_splits=10,
                             shuffle=True,
                            random_state=1)

pipe = Pipeliner(steps, eval_cv=eval_cv, grid_cv=grid_cv, param_grid=param_grid, banned_combos=banned_combos)
pipe.plan_table

Unnamed: 0,Data,Weighters,Normalizers,Featurizers,Selectors,Scalers,Classifiers
0,UCLAsource,origW,spectral,origF,var_threshold,minmax,LR
1,UCLAsource,origW,spectral,low0,var_threshold,minmax,LR
2,UCLAsource,origW,spectral,low5,var_threshold,minmax,LR
3,UCLAsource,origW,spectral,low10,var_threshold,minmax,LR
4,UCLAsource,origW,spectral,low20,var_threshold,minmax,LR
5,UCLAsource,origW,spectral,low30,var_threshold,minmax,LR
6,UCLAsource,origW,spectral,low40,var_threshold,minmax,LR
7,UCLAsource,origW,spectral,low50,var_threshold,minmax,LR
8,UCLAsource,origW,spectral,low60,var_threshold,minmax,LR
9,UCLAsource,origW,spectral,low70,var_threshold,minmax,LR


In [15]:
pipe.get_results('Data/dti/', caching_steps=['Data', 'Weighters', 'Normalizers', 'Featurizers'],
                 scoring=['roc_auc'], results_file = "Orig-Spectral_ofTheBest/" + list_control)

No previous results found.
Line: 1/30
Line: 2/30
Line: 3/30
Line: 4/30
Line: 5/30
Line: 6/30
Line: 7/30
Line: 8/30
Line: 9/30
Line: 10/30
Line: 11/30
Line: 12/30
Line: 13/30
Line: 14/30
Line: 15/30
Line: 16/30
Line: 17/30
Line: 18/30
Line: 19/30
Line: 20/30
Line: 21/30
Line: 22/30
Line: 23/30
Line: 24/30
Line: 25/30
Line: 26/30
Line: 27/30
Line: 28/30
Line: 29/30
Line: 30/30


Unnamed: 0,Data,Weighters,Normalizers,Featurizers,Selectors,Scalers,Classifiers,grid_roc_auc_mean,grid_roc_auc_std,grid_roc_auc_best_params,eval_roc_auc_mean,eval_roc_auc_std,eval_roc_auc_scores
0,UCLAsource,origW,spectral,origF,var_threshold,minmax,LR,0.579433,0.187257,"{'penalty': 'l2', 'C': 0.1, 'max_iter': 50}",0.557667,0.175297,[ 0.56666667 0.56 0.6 0.75 ...
1,UCLAsource,origW,spectral,low0,var_threshold,minmax,LR,0.595035,0.191158,"{'penalty': 'l1', 'C': 0.2, 'max_iter': 50}",0.539333,0.176609,[ 0.63333333 0.36 0.8 0.45 ...
2,UCLAsource,origW,spectral,low5,var_threshold,minmax,LR,0.595035,0.191158,"{'penalty': 'l1', 'C': 0.2, 'max_iter': 50}",0.539333,0.176609,[ 0.63333333 0.36 0.8 0.45 ...
3,UCLAsource,origW,spectral,low10,var_threshold,minmax,LR,0.595035,0.191158,"{'penalty': 'l1', 'C': 0.2, 'max_iter': 50}",0.534333,0.183872,[ 0.63333333 0.36 0.8 0.45 ...
4,UCLAsource,origW,spectral,low20,var_threshold,minmax,LR,0.595035,0.191158,"{'penalty': 'l1', 'C': 0.2, 'max_iter': 50}",0.534333,0.183872,[ 0.63333333 0.36 0.8 0.45 ...
5,UCLAsource,origW,spectral,low30,var_threshold,minmax,LR,0.595035,0.191158,"{'penalty': 'l1', 'C': 0.2, 'max_iter': 50}",0.539333,0.176609,[ 0.63333333 0.36 0.8 0.45 ...
6,UCLAsource,origW,spectral,low40,var_threshold,minmax,LR,0.595035,0.191158,"{'penalty': 'l1', 'C': 0.2, 'max_iter': 50}",0.539333,0.176609,[ 0.63333333 0.36 0.8 0.45 ...
7,UCLAsource,origW,spectral,low50,var_threshold,minmax,LR,0.595035,0.191158,"{'penalty': 'l1', 'C': 0.2, 'max_iter': 50}",0.529333,0.192035,[ 0.63333333 0.36 0.8 0.45 ...
8,UCLAsource,origW,spectral,low60,var_threshold,minmax,LR,0.595035,0.191158,"{'penalty': 'l1', 'C': 0.2, 'max_iter': 50}",0.539333,0.176609,[ 0.63333333 0.36 0.8 0.45 ...
9,UCLAsource,origW,spectral,low70,var_threshold,minmax,LR,0.595035,0.191158,"{'penalty': 'l1', 'C': 0.2, 'max_iter': 50}",0.539333,0.176609,[ 0.63333333 0.36 0.8 0.45 ...


In [100]:
num_epoch = 20

weighters = [('origW', Transformer(orig))]

normalizers = [('spectral', Transformer(spectral_norm))]

classifiers = [('LR_orig', LogisticRegression()),
               ('LR_0', LogisticRegression()),
               ('LR_5', LogisticRegression()),
               ('LR_10', LogisticRegression()),
               ('LR_20', LogisticRegression()),
               ('LR_30', LogisticRegression()),
               ('LR_40', LogisticRegression()),
               ('LR_50', LogisticRegression()),
               ('LR_60', LogisticRegression()),
               ('LR_70', LogisticRegression()),
               ('LR_80', LogisticRegression()),
               ('LR_90', LogisticRegression()),
               ('LR_100', LogisticRegression()),
               ('LR_110', LogisticRegression()),
               ('LR_120', LogisticRegression()),
               ('LR_130', LogisticRegression()),
               ('LR_140', LogisticRegression()),
               ('LR_150', LogisticRegression()),
               ('LR_160', LogisticRegression()),
               ('LR_170', LogisticRegression()),
               ('LR_180', LogisticRegression()),
               ('LR_190', LogisticRegression()),
               ('LR_200', LogisticRegression()),
               ('LR_210', LogisticRegression()),
               ('LR_220', LogisticRegression()),
               ('LR_230', LogisticRegression()),
               ('LR_240', LogisticRegression()),
               ('LR_250', LogisticRegression()),
               ('LR_260', LogisticRegression()),
               ('LR_263', LogisticRegression()),
              ]


param_grid_LR = dict(
    LR_orig = dict(penalty= ['l2'], C= [0.1], max_iter= [50]),
    LR_0    = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_5    = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_10   = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_20   = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_30   = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_40   = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_50   = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_60   = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_70   = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_80   = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_90   = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_100  = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_110  = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_120  = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_130  = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_140  = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_150  = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_160  = dict(penalty= ['l1'], C= [0.9], max_iter= [500]),
    LR_170  = dict(penalty= ['l1'], C= [0.9], max_iter= [100]),
    LR_180  = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_190  = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_200  = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_210  = dict(penalty= ['l1'], C= [0.2], max_iter= [50]),
    LR_220  = dict(penalty= ['l1'], C= [0.4], max_iter= [50]),
    LR_230  = dict(penalty= ['l1'], C= [0.4], max_iter= [500]),
    LR_240  = dict(penalty= ['l1'], C= [0.4], max_iter= [50]),
    LR_250  = dict(penalty= ['l1'], C= [0.4], max_iter= [50]),
    LR_260  = dict(penalty= ['l1'], C= [0.3], max_iter= [50]),
    LR_263  = dict(penalty= ['l1'], C= [0.3], max_iter= [50])
    )

In [101]:
k = [0, 5]+ [10*i for i in range(1, 27)] + [263]
col = pd.read_csv("Orig-Spectral_ofTheBest/" + list_control).as_matrix(["Featurizers"])
col_f = np.delete(np.hstack(col), 0)

In [102]:
banned_combos = []

for n,fet in enumerate(col_f):
    k_tmp = copy.copy(k)
    k_tmp.remove(k[n])
    for i in k_tmp: banned_combos.append((fet, 'LR_' + str(i)))
        
for i in k: banned_combos.append(('origF', 'LR_' + str(i)))
    
for fet in col_f: banned_combos.append((fet, 'LR_orig'))

In [103]:
steps = [('Data', data),
         ('Weighters', weighters),
         ('Normalizers', normalizers),
         ('Featurizers', featurizers),
         ('Selectors', selectors),
         ('Scalers', scalers),
         ('Classifiers', classifiers)]

In [104]:
for epoch in range(num_epoch):

    grid_cv = StratifiedKFold(n_splits=10,
                              shuffle=True,
                              random_state=0)

    eval_cv = StratifiedKFold(n_splits=10,
                              shuffle=True,
                              random_state=1)

    
    
    pipe = Pipeliner(steps, eval_cv=eval_cv, grid_cv=grid_cv, param_grid=param_grid_LR, banned_combos=banned_combos)
    #pipe.plan_table
    
    pipe.get_results('Data/dti/', caching_steps=['Data', 'Weighters', 'Normalizers', 'Featurizers'],
                 scoring=['roc_auc'], results_file = "Orig-Spectral_ofTheBest/" + list_of_files[epoch])

Removed previous results file -- Orig-Spectral_ofTheBest/results_1.csv.
Line: 1/30
Line: 2/30
Line: 3/30
Line: 4/30
Line: 5/30
Line: 6/30
Line: 7/30
Line: 8/30
Line: 9/30
Line: 10/30
Line: 11/30
Line: 12/30
Line: 13/30
Line: 14/30
Line: 15/30
Line: 16/30
Line: 17/30
Line: 18/30
Line: 19/30
Line: 20/30
Line: 21/30
Line: 22/30
Line: 23/30
Line: 24/30
Line: 25/30
Line: 26/30
Line: 27/30
Line: 28/30
Line: 29/30
Line: 30/30
Removed previous results file -- Orig-Spectral_ofTheBest/results_2.csv.
Line: 1/30
Line: 2/30
Line: 3/30
Line: 4/30
Line: 5/30
Line: 6/30
Line: 7/30
Line: 8/30
Line: 9/30
Line: 10/30
Line: 11/30
Line: 12/30
Line: 13/30
Line: 14/30
Line: 15/30
Line: 16/30
Line: 17/30
Line: 18/30
Line: 19/30
Line: 20/30
Line: 21/30
Line: 22/30
Line: 23/30
Line: 24/30
Line: 25/30
Line: 26/30
Line: 27/30
Line: 28/30
Line: 29/30
Line: 30/30
Removed previous results file -- Orig-Spectral_ofTheBest/results_3.csv.
Line: 1/30
Line: 2/30
Line: 3/30
Line: 4/30
Line: 5/30
Line: 6/30
Line: 7/30
Line:

In [109]:
data = [('UCLAsource', Transformer(get_autism))]

weighters = [('origW', Transformer(orig)),
             ('binar', Transformer(orig))]

normalizers = [('origN', Transformer(orig)),
               ('spectral', Transformer(spectral_norm))]

featurizers = [('origF', Transformer(me.orig_vec, collect=['X_vec'])),
               ('low0', Transformer(me.matrix_eig_0, collect=['X_vec'])),
               ('low5', Transformer(me.matrix_eig_5, collect=['X_vec'])),
               ('low10', Transformer(me.matrix_eig_10, collect=['X_vec'])),
               ('low20', Transformer(me.matrix_eig_20, collect=['X_vec'])),
               ('low30', Transformer(me.matrix_eig_30, collect=['X_vec'])),
               ('low40', Transformer(me.matrix_eig_40, collect=['X_vec'])),
               ('low50', Transformer(me.matrix_eig_50, collect=['X_vec'])),
               ('low60', Transformer(me.matrix_eig_60, collect=['X_vec'])),
               ('low70', Transformer(me.matrix_eig_70, collect=['X_vec'])),
               ('low80', Transformer(me.matrix_eig_80, collect=['X_vec'])),
               ('low90', Transformer(me.matrix_eig_90, collect=['X_vec'])),
               ('low100', Transformer(me.matrix_eig_100, collect=['X_vec'])),
               ('low110', Transformer(me.matrix_eig_110, collect=['X_vec'])),
               ('low120', Transformer(me.matrix_eig_120, collect=['X_vec'])),
               ('low130', Transformer(me.matrix_eig_130, collect=['X_vec'])),
               ('low140', Transformer(me.matrix_eig_140, collect=['X_vec'])),
               ('low150', Transformer(me.matrix_eig_150, collect=['X_vec'])),
               ('low160', Transformer(me.matrix_eig_160, collect=['X_vec'])),
               ('low170', Transformer(me.matrix_eig_170, collect=['X_vec'])),
               ('low180', Transformer(me.matrix_eig_180, collect=['X_vec'])),
               ('low190', Transformer(me.matrix_eig_190, collect=['X_vec'])),
               ('low200', Transformer(me.matrix_eig_200, collect=['X_vec'])),
               ('low210', Transformer(me.matrix_eig_210, collect=['X_vec'])),
               ('low220', Transformer(me.matrix_eig_220, collect=['X_vec'])),
               ('low230', Transformer(me.matrix_eig_230, collect=['X_vec'])),
               ('low240', Transformer(me.matrix_eig_240, collect=['X_vec'])),
               ('low250', Transformer(me.matrix_eig_250, collect=['X_vec'])),
               ('low260', Transformer(me.matrix_eig_260, collect=['X_vec'])),
               ('low263', Transformer(me.matrix_eig_263, collect=['X_vec']))
              ]

selectors = [('var_threshold', VarianceThreshold())]

scalers = [('minmax', MinMaxScaler())]

classifiers = [('LR', LogisticRegression())]

steps = [('Data', data),
         ('Weighters', weighters),
         ('Normalizers', normalizers),
         ('Featurizers', featurizers),
         ('Selectors', selectors),
         ('Scalers', scalers),
         ('Classifiers', classifiers)]

param_grid = dict(
     LR=dict(
        C=[0.1*i for i in range(1, 11)],
        max_iter=[50, 100, 500],
        penalty=['l1', 'l2']
    )
    )

banned_combos = [('LR', 'origS'),
                 ('origW', 'LR'),
                 ('spectral', 'LR')]

steps = [('Data', data),
         ('Weighters', weighters),
         ('Normalizers', normalizers),
         ('Featurizers', featurizers),
         ('Selectors', selectors),
         ('Scalers', scalers),
         ('Classifiers', classifiers)]

grid_cv = StratifiedKFold(n_splits=10,
                              shuffle=True,
                              random_state=0)

eval_cv = StratifiedKFold(n_splits=10,
                              shuffle=True,
                              random_state=1)

pipe = Pipeliner(steps, eval_cv=eval_cv, grid_cv=grid_cv, param_grid=param_grid, banned_combos=banned_combos)
pipe.plan_table

Unnamed: 0,Data,Weighters,Normalizers,Featurizers,Selectors,Scalers,Classifiers
0,UCLAsource,binar,origN,origF,var_threshold,minmax,LR
1,UCLAsource,binar,origN,low0,var_threshold,minmax,LR
2,UCLAsource,binar,origN,low5,var_threshold,minmax,LR
3,UCLAsource,binar,origN,low10,var_threshold,minmax,LR
4,UCLAsource,binar,origN,low20,var_threshold,minmax,LR
5,UCLAsource,binar,origN,low30,var_threshold,minmax,LR
6,UCLAsource,binar,origN,low40,var_threshold,minmax,LR
7,UCLAsource,binar,origN,low50,var_threshold,minmax,LR
8,UCLAsource,binar,origN,low60,var_threshold,minmax,LR
9,UCLAsource,binar,origN,low70,var_threshold,minmax,LR


In [110]:
pipe.get_results('Data/dti/', caching_steps=['Data', 'Weighters', 'Normalizers', 'Featurizers'],
                 scoring=['roc_auc'], results_file = "Binar-Orig_ofTheBest/" + list_control)

Removed previous results file -- Binar-Orig_ofTheBest/result_control.csv.
Line: 1/30
Line: 2/30
Line: 3/30
Line: 4/30
Line: 5/30
Line: 6/30
Line: 7/30
Line: 8/30
Line: 9/30
Line: 10/30
Line: 11/30
Line: 12/30
Line: 13/30
Line: 14/30
Line: 15/30
Line: 16/30
Line: 17/30
Line: 18/30
Line: 19/30
Line: 20/30
Line: 21/30
Line: 22/30
Line: 23/30
Line: 24/30
Line: 25/30
Line: 26/30
Line: 27/30
Line: 28/30
Line: 29/30
Line: 30/30


Unnamed: 0,Data,Weighters,Normalizers,Featurizers,Selectors,Scalers,Classifiers,grid_roc_auc_mean,grid_roc_auc_std,grid_roc_auc_best_params,eval_roc_auc_mean,eval_roc_auc_std,eval_roc_auc_scores
0,UCLAsource,binar,origN,origF,var_threshold,minmax,LR,0.607801,0.157715,"{'penalty': 'l2', 'C': 0.1, 'max_iter': 50}",0.593333,0.159332,[ 0.63333333 0.56 0.64 0.85 ...
1,UCLAsource,binar,origN,low0,var_threshold,minmax,LR,0.564539,0.136666,"{'penalty': 'l1', 'C': 0.7000000000000001, 'ma...",0.527333,0.147752,[ 0.63333333 0.76 0.48 0.65 ...
2,UCLAsource,binar,origN,low5,var_threshold,minmax,LR,0.593972,0.115015,"{'penalty': 'l1', 'C': 1.0, 'max_iter': 50}",0.505667,0.163531,[ 0.56666667 0.72 0.32 0.8 ...
3,UCLAsource,binar,origN,low10,var_threshold,minmax,LR,0.594504,0.11594,"{'penalty': 'l1', 'C': 1.0, 'max_iter': 50}",0.52,0.136308,[ 0.6 0.68 0.32 0.75 0.5 0.6 0.5 0....
4,UCLAsource,binar,origN,low20,var_threshold,minmax,LR,0.625887,0.129411,"{'penalty': 'l1', 'C': 0.9, 'max_iter': 50}",0.578,0.115482,[ 0.5 0.64 0.64 0.85 0.5 0.6 0.5 0....
5,UCLAsource,binar,origN,low30,var_threshold,minmax,LR,0.58617,0.165254,"{'penalty': 'l1', 'C': 1.0, 'max_iter': 50}",0.555,0.136033,[ 0.5 0.68 0.52 0.75 0.5 0.75 0.35 0....
6,UCLAsource,binar,origN,low40,var_threshold,minmax,LR,0.62234,0.182567,"{'penalty': 'l1', 'C': 0.9, 'max_iter': 100}",0.578,0.170341,[ 0.7 0.72 0.76 0.7 0.5 0.7 0.3 0....
7,UCLAsource,binar,origN,low50,var_threshold,minmax,LR,0.610461,0.163509,"{'penalty': 'l1', 'C': 0.8, 'max_iter': 500}",0.56,0.137768,[ 0.6 0.68 0.72 0.65 0.6 0.6 0.5 0....
8,UCLAsource,binar,origN,low60,var_threshold,minmax,LR,0.622695,0.161008,"{'penalty': 'l1', 'C': 1.0, 'max_iter': 500}",0.587333,0.151495,[ 0.63333333 0.56 0.88 0.75 ...
9,UCLAsource,binar,origN,low70,var_threshold,minmax,LR,0.648936,0.1679,"{'penalty': 'l1', 'C': 0.9, 'max_iter': 100}",0.591667,0.176423,[ 0.66666667 0.6 1. 0.7 ...


In [None]:
data = [('UCLAsource', Transformer(get_autism))]

weighters = [('binar', Transformer(binar_no))]

normalizers = [('origN', Transformer(orig)),
               ('spectral', Transformer(spectral_norm))]

featurizers = [('origF', Transformer(me.orig_vec, collect=['X_vec'])),
               ('low230', Transformer(me.matrix_eig_230, collect=['X_vec'])),
               ('low240', Transformer(me.matrix_eig_240, collect=['X_vec'])),
               ('low250', Transformer(me.matrix_eig_250, collect=['X_vec'])),
               ('low260', Transformer(me.matrix_eig_260, collect=['X_vec']))]

selectors = [('var_threshold', VarianceThreshold())]

scalers = [('minmax', MinMaxScaler())]

classifiers = [('LR_orig' , LogisticRegression()),
               ('LR_binar', LogisticRegression())]

steps = [('Data', data),
         ('Weighters', weighters),
         ('Normalizers', normalizers),
         ('Featurizers', featurizers),
         ('Selectors', selectors),
         ('Scalers', scalers),
         ('Classifiers', classifiers)]

param_grid = dict(
     LR_orig =dict(penalty=['l1'], C=[0.4], max_iter=[50]),
     LR_binar=dict(penalty=['l2'], C=[0.6], max_iter=[50])
    )

banned_combos = [('LR_orig', 'origN'),
                 ('LR_binar', 'spectral'),
                 ('LR_orig', 'low250'),
                 ('LR_orig', 'low260'),
                 ('LR_binar', 'low230'),
                 ('LR_binar', 'low240')]


## SVC

In [10]:
data = [('UCLAsource', Transformer(get_autism))]

weighters = [('binar', Transformer(orig))]

normalizers = [('origN', Transformer(orig))]

featurizers = [('origF', Transformer(me.orig_vec, collect=['X_vec'])),
               ('low0', Transformer(me.matrix_eig_0, collect=['X_vec'])),
               ('low5', Transformer(me.matrix_eig_5, collect=['X_vec'])),
               ('low10', Transformer(me.matrix_eig_10, collect=['X_vec'])),
               ('low20', Transformer(me.matrix_eig_20, collect=['X_vec'])),
               ('low30', Transformer(me.matrix_eig_30, collect=['X_vec'])),
               ('low40', Transformer(me.matrix_eig_40, collect=['X_vec'])),
               ('low50', Transformer(me.matrix_eig_50, collect=['X_vec'])),
               ('low60', Transformer(me.matrix_eig_60, collect=['X_vec'])),
               ('low70', Transformer(me.matrix_eig_70, collect=['X_vec'])),
               ('low80', Transformer(me.matrix_eig_80, collect=['X_vec'])),
               ('low90', Transformer(me.matrix_eig_90, collect=['X_vec'])),
               ('low100', Transformer(me.matrix_eig_100, collect=['X_vec'])),
               ('low110', Transformer(me.matrix_eig_110, collect=['X_vec'])),
               ('low120', Transformer(me.matrix_eig_120, collect=['X_vec'])),
               ('low130', Transformer(me.matrix_eig_130, collect=['X_vec'])),
               ('low140', Transformer(me.matrix_eig_140, collect=['X_vec'])),
               ('low150', Transformer(me.matrix_eig_150, collect=['X_vec'])),
               ('low160', Transformer(me.matrix_eig_160, collect=['X_vec'])),
               ('low170', Transformer(me.matrix_eig_170, collect=['X_vec'])),
               ('low180', Transformer(me.matrix_eig_180, collect=['X_vec'])),
               ('low190', Transformer(me.matrix_eig_190, collect=['X_vec'])),
               ('low200', Transformer(me.matrix_eig_200, collect=['X_vec'])),
               ('low210', Transformer(me.matrix_eig_210, collect=['X_vec'])),
               ('low220', Transformer(me.matrix_eig_220, collect=['X_vec'])),
               ('low230', Transformer(me.matrix_eig_230, collect=['X_vec'])),
               ('low240', Transformer(me.matrix_eig_240, collect=['X_vec'])),
               ('low250', Transformer(me.matrix_eig_250, collect=['X_vec'])),
               ('low260', Transformer(me.matrix_eig_260, collect=['X_vec'])),
               ('low263', Transformer(me.matrix_eig_263, collect=['X_vec']))
              ]

selectors = [('var_threshold', VarianceThreshold())]

scalers = [('minmax', MinMaxScaler())]

classifiers = [('SVC', SVC())]

steps = [('Data', data),
         ('Weighters', weighters),
         ('Normalizers', normalizers),
         ('Featurizers', featurizers),
         ('Selectors', selectors),
         ('Scalers', scalers),
         ('Classifiers', classifiers)]

param_grid = dict(
     SVC=dict(
        C=[0.0005, 0.001, 0.005, 0.01] + [i*0.05 for i in range(1,11)],
        degree=[2, 3, 4],
        kernel=['linear', 'poly', 'rbf', 'sigmoid'],
        max_iter=[50, 100, 150],
    )
    )

banned_combos = []

steps = [('Data', data),
         ('Weighters', weighters),
         ('Normalizers', normalizers),
         ('Featurizers', featurizers),
         ('Selectors', selectors),
         ('Scalers', scalers),
         ('Classifiers', classifiers)]

In [None]:
num_epoch = 10
for epoch in range(num_epoch):

    grid_cv = StratifiedKFold(n_splits=10,
                              shuffle=True,
                              random_state=0)

    eval_cv = StratifiedKFold(n_splits=10,
                              shuffle=True,
                              random_state=1)

    
    pipe = Pipeliner(steps, eval_cv=eval_cv, grid_cv=grid_cv, param_grid=param_grid, banned_combos=banned_combos)
    #pipe.plan_table
    
    pipe.get_results('Data/dti/', caching_steps=['Data', 'Weighters', 'Normalizers', 'Featurizers'],
                 scoring=['roc_auc'], results_file = "SVC_Binar/" + list_of_files[epoch])

Removed previous results file -- SVC_Binar/results_1.csv.
Line: 1/30
Line: 2/30
Line: 3/30
Line: 4/30
Line: 5/30
Line: 6/30
Line: 7/30
Line: 8/30
Line: 9/30
Line: 10/30
Line: 11/30
Line: 12/30
Line: 13/30
Line: 14/30
Line: 15/30
Line: 16/30
Line: 17/30
Line: 18/30
Line: 19/30
Line: 20/30
Line: 21/30
Line: 22/30
Line: 23/30
Line: 24/30
Line: 25/30
Line: 26/30
Line: 27/30
Line: 28/30
Line: 29/30
Line: 30/30
No previous results found.
Line: 1/30
Line: 2/30
Line: 3/30
Line: 4/30
Line: 5/30
Line: 6/30
Line: 7/30
Line: 8/30
Line: 9/30
Line: 10/30
Line: 11/30
Line: 12/30
Line: 13/30
Line: 14/30
Line: 15/30
Line: 16/30
Line: 17/30
Line: 18/30
Line: 19/30
Line: 20/30
Line: 21/30
Line: 22/30
Line: 23/30
Line: 24/30
Line: 25/30
Line: 26/30
Line: 27/30
Line: 28/30
Line: 29/30
Line: 30/30
No previous results found.
Line: 1/30


In [10]:
data = [('UCLAsource', Transformer(get_autism))]

weighters = [('binar', Transformer(binar_norm))]

normalizers = [('origN', Transformer(orig))]

featurizers = [('origF', Transformer(me.orig_vec, collect=['X_vec'])),
               ('low0', Transformer(me.matrix_eig_0, collect=['X_vec'])),
               ('low5', Transformer(me.matrix_eig_5, collect=['X_vec'])),
               ('low10', Transformer(me.matrix_eig_10, collect=['X_vec'])),
               ('low20', Transformer(me.matrix_eig_20, collect=['X_vec'])),
               ('low30', Transformer(me.matrix_eig_30, collect=['X_vec'])),
               ('low40', Transformer(me.matrix_eig_40, collect=['X_vec'])),
               ('low50', Transformer(me.matrix_eig_50, collect=['X_vec'])),
               ('low60', Transformer(me.matrix_eig_60, collect=['X_vec'])),
               ('low70', Transformer(me.matrix_eig_70, collect=['X_vec'])),
               ('low80', Transformer(me.matrix_eig_80, collect=['X_vec'])),
               ('low90', Transformer(me.matrix_eig_90, collect=['X_vec'])),
               ('low100', Transformer(me.matrix_eig_100, collect=['X_vec'])),
               ('low110', Transformer(me.matrix_eig_110, collect=['X_vec'])),
               ('low120', Transformer(me.matrix_eig_120, collect=['X_vec'])),
               ('low130', Transformer(me.matrix_eig_130, collect=['X_vec'])),
               ('low140', Transformer(me.matrix_eig_140, collect=['X_vec'])),
               ('low150', Transformer(me.matrix_eig_150, collect=['X_vec'])),
               ('low160', Transformer(me.matrix_eig_160, collect=['X_vec'])),
               ('low170', Transformer(me.matrix_eig_170, collect=['X_vec'])),
               ('low180', Transformer(me.matrix_eig_180, collect=['X_vec'])),
               ('low190', Transformer(me.matrix_eig_190, collect=['X_vec'])),
               ('low200', Transformer(me.matrix_eig_200, collect=['X_vec'])),
               ('low210', Transformer(me.matrix_eig_210, collect=['X_vec'])),
               ('low220', Transformer(me.matrix_eig_220, collect=['X_vec'])),
               ('low230', Transformer(me.matrix_eig_230, collect=['X_vec'])),
               ('low240', Transformer(me.matrix_eig_240, collect=['X_vec'])),
               ('low250', Transformer(me.matrix_eig_250, collect=['X_vec'])),
               ('low260', Transformer(me.matrix_eig_260, collect=['X_vec'])),
               ('low263', Transformer(me.matrix_eig_263, collect=['X_vec']))
              ]

selectors = [('var_threshold', VarianceThreshold())]

scalers = [('minmax', MinMaxScaler())]

classifiers = [('SVC', SVC())]

steps = [('Data', data),
         ('Weighters', weighters),
         ('Normalizers', normalizers),
         ('Featurizers', featurizers),
         ('Selectors', selectors),
         ('Scalers', scalers),
         ('Classifiers', classifiers)]

param_grid = dict(
     SVC=dict(
        C=[0.001, 0.01] + [i*0.1 for i in range(1,6)],
        degree=[2, 3, 4],
        kernel=['linear', 'poly', 'rbf', 'sigmoid'],
        max_iter=[50, 100, 150],
    )
    )

banned_combos = []

steps = [('Data', data),
         ('Weighters', weighters),
         ('Normalizers', normalizers),
         ('Featurizers', featurizers),
         ('Selectors', selectors),
         ('Scalers', scalers),
         ('Classifiers', classifiers)]

In [None]:
num_epoch = 10
for epoch in range(num_epoch):

    grid_cv = StratifiedKFold(n_splits=10,
                              shuffle=True)

    eval_cv = StratifiedKFold(n_splits=10,
                              shuffle=True,
                              random_state=1)

    
    pipe = Pipeliner(steps, eval_cv=eval_cv, grid_cv=grid_cv, param_grid=param_grid, banned_combos=banned_combos)
    #pipe.plan_table
    
    pipe.get_results('Data/dti/', caching_steps=['Data', 'Weighters', 'Normalizers', 'Featurizers'],
                 scoring=['roc_auc'], results_file = "SVC_Binar/" + list_of_files[epoch])

Removed previous results file -- SVC_Binar/results_1.csv.
Line: 1/30
Line: 2/30
Line: 3/30
Line: 4/30
Line: 5/30
Line: 6/30
Line: 7/30
Line: 8/30
Line: 9/30
Line: 10/30
Line: 11/30
Line: 12/30
Line: 13/30
Line: 14/30
Line: 15/30
Line: 16/30
Line: 17/30
Line: 18/30
Line: 19/30
Line: 20/30
Line: 21/30
Line: 22/30
Line: 23/30
Line: 24/30
Line: 25/30
Line: 26/30
Line: 27/30
Line: 28/30
Line: 29/30
Line: 30/30
Removed previous results file -- SVC_Binar/results_2.csv.
Line: 1/30
Line: 2/30
Line: 3/30


Process PoolWorker-1038:
Traceback (most recent call last):
  File "/home/ubuntu/anaconda2/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/ubuntu/anaconda2/lib/python2.7/multiprocessing/process.py", line 114, in run
    self._target(*self._args, **self._kwargs)
  File "/home/ubuntu/anaconda2/lib/python2.7/multiprocessing/pool.py", line 102, in worker
    task = get()
  File "/home/ubuntu/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/pool.py", line 362, in get
KeyboardInterrupt
    return recv()


KeyboardInterrupt: 