### Подключим необходимые библиотеки

In [None]:
from sklearn.preprocessing import FunctionTransformer

from reskit.norms import binar_norm, wbysqdist
from reskit.norms import spectral_norm

from reskit.features import degrees,  pagerank

from sklearn.feature_selection import VarianceThreshold

from sklearn.preprocessing import MinMaxScaler

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier 
from xgboost import XGBClassifier

from sklearn.model_selection import StratifiedKFold

from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score

from reskit.core import Transformer, Pipeliner

import os
import pandas as pd
import numpy as np
import copy

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

def orig(x):
    return x

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import scipy.io
import scipy.sparse
import scipy.stats
from scipy import interp
import time
import networkx as nx
import sys
import igraph as ig

### Функция считывания данных

In [None]:
def get_autism(path_to_read='Data/dti/', distances=True):
    def get_autism_distances(loc_name):
        with open(loc_name, 'r') as f:
            read_data = f.readlines()

        read_data = pd.DataFrame(
            np.array([np.array(item[:-1].split()).astype(int) for item in read_data]))

        return read_data

    def get_distance_matrix(coords):
        if type(coords) == pd.core.frame.DataFrame:
            coords = coords.values
        elif type(coords) != np.ndarray:
            print('Provide either pandas df or numpy array!')
            return -1

        shape = len(coords)
        dist_matrix = np.zeros((shape, shape))
        del shape
        for i in range(len(coords)):
            for j in range(i + 1, len(coords)):
                dist_matrix[i, j] = np.linalg.norm(coords[i, :] - coords[j, :])
                dist_matrix[j, i] = dist_matrix[i, j]
        return dist_matrix

    target_vector = []  # this will be a target vector (diagnosis)
    matrices = []  # this will be a list of connectomes
    all_files = sorted(os.listdir(path_to_read))
    matrix_files = [
        item for item in all_files if 'DTI_connectivity' in item and 'All' not in item]
    distance_files = [
        item for item in all_files if 'DTI_region_xyz_centers' in item and 'All' not in item]

    # for each file in a sorted (!) list of files:
    for filename in matrix_files:

        A_dataframe = pd.read_csv(
            path_to_read + filename, sep='   ', header=None, engine='python')
        A = A_dataframe.values  # we will use a list of numpy arrays, NOT pandas dataframes
        matrices.append(A)# append a matrix to our list
        if "ASD" in filename:
            target_vector.append(1)
        elif "TD" in filename:
            target_vector.append(0)
    asd_dict = {}
    asd_dict['X'] = np.array(matrices)
    asd_dict['y'] = np.array(target_vector)
    if distances:
        dist_matrix_list = []
        for item in distance_files:
            # print(item)
            cur_coord = get_autism_distances(path_to_read + item)
            cur_dist_mtx = get_distance_matrix(cur_coord)
            dist_matrix_list += [cur_dist_mtx]

        asd_dict['dist'] = np.array(dist_matrix_list)

    return asd_dict


## Сделаем один пайплайн

### Функция понижения ранга матрицы

In [None]:
def matrix_eig(data, k = 30):
    data['X_low'] = np.zeros_like(data['X'])
    for i in np.arange(data['X'].shape[0]):
        curs, vecs = np.linalg.eig(data['X'][i])
        curs_abs = abs(curs)
        indeces_del = curs_abs.argsort()[:k]
        vecs_n = np.delete(vecs, indeces_del, axis=1)
        curs = np.delete(curs, indeces_del)
        vecs_i = np.delete(np.linalg.inv(vecs), indeces_del, axis=0)
        data['X_low'][i] = vecs_n.dot(np.diag(curs)).dot(vecs_i).astype('float')
    return data

In [None]:
#The function accepts two lists of matrices
#Matrices in the first list are symmetric adjacency matrices to work with
#Matrices in the second list are the respective metrices of shortest path lengths (in the matching order!)

#Note: matrices of shortest path lengths are optional, can be computed if nothing is provided
#Note 2: It is assumed that weights in the adjacency matrix are proportional to strenght (inversely proportional
#        to distances)

#Returns: Pandas dataframe of computed metrics of shape N X (20 + 13*n_of_nodes): 20 graph-level metrics and 13 
#         node-level metrics.
#         List of long feature names

#Known issues:

# 1. Formally, some metrics can be switched off when the function is called. In fact, this can break 
#    some dependent functions. For example, if local_efficiency is set to false, graph_local_efficiency
#    will not be computed (this is the average of local efficiencies). Another example: triangles use clustering
#    coefficients and cannot be computed with clustering_coefficient=False

# 2. Summary of node metrics (percentiles, stds, etc) is not computed within this function, except meaningful
#    graph metrics that are averages of node-level metrics (these are: graph characteristic path length,
#    graph global efficiency, graph local efficiency (two versions), graph clustering coefficient). Centralities 
#    are summarized in a single index using Freeman centralization formula (done for degree, closeness, 
#    betweenness and eigenvector centralities)

# 3. It is assumed that the input graphs are weighted. No check for this is implemented, and some algorithms
#    could be more efficient for binary adjacency matrices. At this stage, it did not seem to be worth trouble to 
#    implement two different functions for weighted and unweighted input graphs.



def symm_metrics(list_of_matrices, #accepts list of matrices
                 list_of_shortest_path_length_matrices = None, #accepts list of the respective SPL matrices
                 
                 #node metrics:
                 degree=True, #weighted degrees
                 neighborhood_degree=True, #average neighbor degrees (weighted)
                 closeness_centrality=True, #inverse of the characteristic path length
                 betweenness_centrality=True, 
                 eigenvector_centrality=True, 
                 clustering_coefficient=True, 
                 triangles=True, #weighted number of triangles
                 eccentricity = True, #max shortest path between this vertex and other verticies
                 characteristic_path_length = True, #mean of distances to other vertices
                 efficiency = True, #mean of inverse distances to other verticies
                 local_efficiency = True, #inverse of the shortest path in the neighborhood
                 
                 
                 #graph metrics
                  graph_characteristic_path_length = True, #mean of the local CPLs
                  graph_global_efficiency = True, #mean of the efficiencies
                  graph_local_efficiency = True, #mean of the local efficiencies
                  graph_clustering_coefficient = True, #mean clustering coefficient
                  graph_density=True, #weighted, normed by n*(n-1)
                  graph_assortativity=True, #weighted degrees are used within a regular formula
                  graph_assortativity_sporns=True, #weights are used as coefficients in addition to weighted degrees
                                                   #as in Rubinov & Sporns (2010)
                  graph_max_clique=True, #returns two features: maximal sum of weights of the largest cliques and
                                         #mean sum of weights of the cliques of maximal size
                  graph_transitivity=True, #note that it differs from the graph clustering coefficient,
                                           #see Rubinov & Sporns (2010) for firmulas
                  graph_diameter=True, #maximal eccenticity
                  graph_radius=True, #produces three features: radius, number of central vertices and 
                               #the index of the central vertex (if one) or NA (if several)
                  graph_alg_connectivity=True, #second-smallest eigenvalue of the Laplacian matrix
                  graph_freeman_degree=True,#Freeman degree centralization
                  graph_freeman_betweenness=True,#Freeman betweenness centralization
                  graph_freeman_closeness=True,#Freeman closeness centralization
                  graph_freeman_eigenvector=True #Freeman eigenvector centralization
                  ):
    
    
    degrees = [] # node degrees
    neighb_deg = [] # average degree of the neighborhood of each node as in Barrat (2004)
    neighb_deg_w = [] # average degree of the neighborhood of each node as in our ITaS paper
    closen = [] # closeness centrality of each node
    betw = [] # betweenness centralities
    eigen_c = [] # eigenvector centralities
    triang = [] # number of triangles for each node
    clust = [] # clustering coefficients
    ecc = [] #eccentricity
    cpl = [] #characteristic path length
    eff = [] #node-level global efficiency
    leff = [] #node-level local efficiency
    leff2 = [] #node-level local efficiency by Rubinov & Sporns (2010)
    
    gcpl = [] #for graph CPL
    ggeff = [] #for graph global efficiency
    gleff = [] #for graph local efficiency
    gleff2 = [] #for graph local efficiency by Rubinov & Sporns (2010)
    gcc = [] #for graph cluctering coefficient
    gdens=[] #for graph density
    gassort=[] #for graph assortativity
    gassortsp = [] #for full graph assortativity (Sporns)
    gmaxmax_cl=[] #for maximal sum of weights of maximal clique
    gmeanmax_cl=[] #for mean sum of weights of all maximal cliques
    gtrans=[] #for transitivity
    gdiam=[] #for diameter
    grad=[] #for radius
    gn_rad = [] #for the number of central nodes
    garg_rad = [] #for the index of central node (if the only one)
    galg_con = [] #for graph algebraic connectivity
    gfrdeg = [] #for Freeman degree centralization
    gfrbetw = [] #for Freeman betweenness centralization
    gfrclos = [] #for Freeman closeness centralization
    gfreig = [] #for Freeman eigenvector centralization
    
    
    N = len(list_of_matrices)
    n_nodes = list_of_matrices[0].shape[0]
    
    star_degree = float((n_nodes - 1)*(n_nodes - 2))
    star_closeness = star_degree/float(2*n_nodes - 3)
    star_betweenness = float(n_nodes - 1)
    single_eigenvector = float(n_nodes - 2)
        
    graph_features=[]
    descr_features=[]
    names_features = []
                                                                    
    for matrix in range(0, N):
        A = list_of_matrices[matrix] #select an adjacency matrix
        A_inv = 1./A #matrix with inverse weights, will be used to compute shortest path lengths 
                     #and the metrics that are based on them
        G = ig.Graph.Weighted_Adjacency(list(A), mode="UNDIRECTED", attr="weight", loops=False)
        G_inv = ig.Graph.Weighted_Adjacency(list(A_inv), mode="UNDIRECTED", attr="weight", loops=False)
        Gnx = nx.from_numpy_matrix(A) #used previously
        
        if list_of_shortest_path_length_matrices == None: #check if matrices of shortest path length are provided
            SPL = scipy.sparse.csgraph.dijkstra(A_inv, directed=False, unweighted=False)
        else:
            SPL = list_of_shortest_path_length_matrices[matrix] #select the respective matrix of shortest path lengths
        
        inv_SPL_with_inf = 1./SPL #matrix of inverse path lengths, with inf on the main diagonal 
                                  #and numbers elsewhere
        inv_SPL_with_nan = inv_SPL_with_inf.copy()
        inv_SPL_with_nan[np.isinf(inv_SPL_with_inf)]=np.nan #infs are replaced by nans
                                            #this is done to be able to ignore them when computing means
                                            #we only need raw means of the off-diagonal elements
        sum_distances_vector = np.sum(SPL, 1) #vector of sum distances from a node to other nodes
        degrees_vector=np.sum(A, 1) #vector of weighted node degrees
        deg_by_deg_minus_one = np.multiply(degrees_vector, (degrees_vector - 1))
        non_weighted_degrees = np.array(G.degree())
        non_weighted_deg_by_deg_minus_one = np.multiply(non_weighted_degrees, (non_weighted_degrees - 1))
        
        if degree==True:
            degrees.append(list(degrees_vector))   
        if neighborhood_degree==True:
            neigh = np.divide(np.dot(non_weighted_degrees.reshape(1,n_nodes), A).reshape(n_nodes,), 
                              np.array(degrees_vector, dtype = float)) #as in nx: Barrat et al. (2004)
            neigh_w = np.divide(np.dot(degrees_vector.reshape(1,n_nodes), A).reshape(n_nodes,), 
                              np.array(degrees_vector, dtype = float)) #as in our ITaS paper
            neighb_deg.append(neigh)
            neighb_deg_w.append(neigh_w)
        if closeness_centrality==True:
            cl_c = float(n_nodes - 1)/sum_distances_vector #computed directly from SPL matrix, NORMED by (n-1)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  
            closen.append(list(cl_c))
        if betweenness_centrality==True:
            btw=np.array(G_inv.betweenness(weights='weight', directed=False))*2./((n_nodes-1)*(n_nodes-2))
                                                            #Note that it can be problematic for very large graphs
                                                            #because of the default nobigint=True option
                                                            #normed by multiplying by 2/(n-1)(n-2)
            betw.append(btw)
        if eigenvector_centrality==True:
            eigc = G.eigenvector_centrality(weights='weight', directed=False) #default scale=True is used
                                                 #normalize the centralities so the largest one will always be 1
            eigen_c.append(eigc)
        if clustering_coefficient==True:
            clst_geommean =nx.clustering(Gnx, weight='weight').values() #as in nx, Saramäki et al. (2007)
                                                                        #and also in Rubinov & Sporns (2010)
            clust.append(clst_geommean)  
        if triangles==True:
            tr = np.multiply(np.array(clst_geommean), np.array(non_weighted_deg_by_deg_minus_one, dtype = float))/2. 
                        #weighted triangles are restored from CCs, normed by non-weighted k*(k-1)
            triang.append(list(tr))
        if eccentricity==True:
            eccentricities = np.max(SPL, 1) #importantly: used for diameter and radius         
            ecc.append(list(eccentricities))
        if characteristic_path_length==True:
            cpls = sum_distances_vector/float(n_nodes - 1) #this was simply the inverse of the closeness 
                           #centrality (and vice versa) when closeness centralities are normed by (n-1)
            cpl.append(list(cpls))    
        if efficiency==True:
            efs = np.nanmean(inv_SPL_with_nan, 1) #ignores nans on the main diagonal and thus divides by (n-1)          
            eff.append(list(efs))
        if local_efficiency==True: #might be NOT computationally efficient
            lefs_num = []
            lefs1 = []
            for node in range(0, n_nodes):
                neighb_list = G.neighbors(node)
                subgraph_size = len(neighb_list)
                if subgraph_size < 2:
                    lefs_num.append(float(0))
                    lefs1.append(float(0))
                else:
                    G_subgraph = G_inv.induced_subgraph(neighb_list)
                    SPL_subgraph = np.array(G_subgraph.shortest_paths(weights='weight'))
                    inv_SPL_subgraph = 1./SPL_subgraph #diagonal elements were 0, became infs
                    inv_SPL_subgraph[np.isinf(inv_SPL_subgraph)]=0 #replace diagonal elements by zeros so that 
                                                                   #they do not add to sums
                    lefs1.append(np.sum(inv_SPL_subgraph)/(2*float(non_weighted_deg_by_deg_minus_one[node])))
                    leff_values = []
                    for i in range(0, subgraph_size):
                        for j in range(i+1, subgraph_size):
                            leff_value = np.power(A[node, neighb_list[i]]*A[node, neighb_list[j]]*inv_SPL_subgraph[i,j], 1./3.)
                            leff_values.append(leff_value)
                    lefs_num.append(np.sum(leff_values))
            lefs = np.divide(lefs_num, non_weighted_deg_by_deg_minus_one)
            lefs[np.where(np.array(lefs_num)==0)[0]]=0.
                        
            leff.append(lefs1)                     
            leff2.append(list(lefs))    
        
        #Graph-level metrics:
        if graph_characteristic_path_length ==True:
            gcpl.append(np.mean(cpls))
        if graph_global_efficiency == True:
            ggeff.append(np.mean(efs))
        if graph_local_efficiency ==True:
            gleff.append(np.mean(lefs1)) 
            gleff2.append(np.mean(lefs)) #means are here intentionally (looks like mistake in Rubinov & Sporns, 2010)
        if graph_clustering_coefficient == True:
            gcc.append(np.mean(clst_geommean))
        if graph_density == True:
            dens_G = np.sum(A)/float(n_nodes*(n_nodes-1)) #compute weighted density as the sum of all weights 
                                                        #divided by the possible number of edges
            gdens.append(dens_G)
        if graph_assortativity ==True: 
            assort = G.assortativity(types1 = degrees_vector, directed = False)
            if np.isnan(assort)==True:
                gassort.append(0)
            else:
                gassort.append(assort)
        if graph_assortativity_sporns == True: 
            degrees_squared = np.power(degrees_vector, 2)
            max_w = np.max(A)
            max_w_cube = np.power(max_w, 3)
            max_w_sq = np.power(max_w, 2)
            inv_l = 2./len(np.nonzero(A)[0])
            deg_mult = np.zeros((n_nodes, n_nodes))
            deg_sum = np.zeros((n_nodes, n_nodes))
            deg_sq_sum = np.zeros((n_nodes, n_nodes))
            for i in range(0, n_nodes):
                for j in range(i+1, n_nodes):
                    deg_mult[i,j] = degrees_vector[i]*degrees_vector[j]
                    deg_mult[j,i] = deg_mult[i,j]
                    deg_sum[i,j] = degrees_vector[i] + degrees_vector[j]
                    deg_sum[j,i] = deg_sum[i,j]
                    deg_sq_sum[i,j] = degrees_squared[i] + degrees_squared[j]
                    deg_sq_sum[j,i] = deg_sq_sum[i,j]
            assortsp1 = inv_l * np.sum(np.multiply(A, deg_mult))/float(2.*max_w_cube)
            assortsp2 = np.power((inv_l*(1./(4.*max_w_sq)))*np.sum(np.multiply(A, deg_sum)),2)
            assortsp3 = (inv_l*(1./(4.*max_w_cube)))*np.sum(np.multiply(A, deg_sq_sum))
            assortsp = (assortsp1 - assortsp2)/float(assortsp3 - assortsp2)
            gassortsp.append(assortsp)
        if graph_max_clique == True: 
            max_cliques = G.largest_cliques()
            weights_of_max_cl = []
            for clique in max_cliques:
                weights_of_max_cl.append(np.sum(G.induced_subgraph(clique).strength(weights='weight'))/2.)
            gmaxmax_cl.append(np.max(weights_of_max_cl))
            gmeanmax_cl.append(np.mean(weights_of_max_cl))
        if graph_transitivity == True: 
            trans = float(2*np.sum(tr))/float(np.sum(non_weighted_deg_by_deg_minus_one))
            gtrans.append(trans)
        if graph_diameter==True:
            gdiam.append(np.max(eccentricities))
        if graph_radius == True:       
            min_ecc = np.min(eccentricities) 
            ind_rad = np.where(eccentricities==min_ecc)[0]
            n_rad = len(list(ind_rad))
            grad.append(min_ecc)
            gn_rad.append(n_rad)
            if n_rad==1:
                garg_rad.append(ind_rad[0])
            else:
                garg_rad.append(np.nan)
        if graph_alg_connectivity == True:
            sparse_lapl = scipy.sparse.csr_matrix(np.array(G.laplacian(weights='weight')))
            second_sm_eigv = scipy.sparse.linalg.eigsh(sparse_lapl, k=2, which='SM', maxiter=100000, return_eigenvectors = False)[0]
            galg_con.append(second_sm_eigv)
        if graph_freeman_degree == True:
            frdeg_num = np.sum(np.max(degrees_vector) - degrees_vector)
            gfrdeg.append(frdeg_num/star_degree)
        if graph_freeman_betweenness == True:
            frbetw_num = np.sum(np.max(btw) - btw)
            gfrbetw.append(frbetw_num/star_betweenness)
        if graph_freeman_closeness == True:
            frclos_num = np.sum(np.max(cl_c) - cl_c)
            gfrclos.append(frclos_num/star_closeness)
        if graph_freeman_eigenvector == True:
            freig_num = np.sum(np.max(eigc) - eigc)
            gfreig.append(freig_num/single_eigenvector)
                
    if graph_characteristic_path_length == True:
        graph_features.append(gcpl)
        descr_features.append('graph characteristic path length (mean of the characteristic path lenghts)')
        names_features.append('graph_characteristic_path_length')
    if graph_global_efficiency == True:
        graph_features.append(ggeff)
        descr_features.append('graph global efficiency (mean of the node-level global efficiencies)')
        names_features.append('graph_global_efficiency')
    if graph_local_efficiency == True:
        graph_features.append(gleff)
        descr_features.append('graph local efficiency (mean of the node-level local efficiencies)')        
        names_features.append('graph_local_efficiency')
        graph_features.append(gleff2)
        descr_features.append('graph local efficiency (mean of the node-level local efficiencies by Sporns)')        
        names_features.append('graph_local_efficiency_sporns')
    if graph_clustering_coefficient == True:
        graph_features.append(gcc)
        descr_features.append('graph clustering coefficient (mean of the clustering coefficients)')        
        names_features.append('graph_clustering_coefficient')
    if graph_density == True:
        graph_features.append(gdens)
        descr_features.append('graph weighted density (normed by maximal possible density of unweighted graph of the same size)')
        names_features.append('graph_density')
    if graph_assortativity == True: 
        graph_features.append(gassort)
        descr_features.append('graph assortativity by weighted degree')
        names_features.append('graph_assortativity')
    if graph_assortativity_sporns==True: 
        graph_features.append(gassortsp)
        descr_features.append('graph weighted assortativity as described in Rubinov & Sporns (2010)')
        names_features.append('graph_assortativity_sporns')
    if graph_max_clique == True: 
        graph_features.append(gmaxmax_cl)
        descr_features.append('maximal sum of weights of the cliques of the largest size')
        names_features.append('max_weights_sum_largest_cliques')
        graph_features.append(gmeanmax_cl)
        descr_features.append('mean sum of weights of the cliques of the largest size')
        names_features.append('mean_weights_sum_largest_cliques')
    if graph_transitivity == True: 
        graph_features.append(gtrans)
        descr_features.append('graph transitivity')
        names_features.append('graph_transitivity')
    if graph_diameter == True:
        graph_features.append(gdiam) 
        descr_features.append('graph weighted diameter')
        names_features.append('graph_diameter')
    if graph_radius==True:       
        graph_features.append(grad)              
        descr_features.append('graph weighted radius')
        names_features.append('graph_radius')
        graph_features.append(gn_rad)              
        descr_features.append('graph number of centers')
        names_features.append('graph_number_of_centers')
        graph_features.append(garg_rad)              
        descr_features.append('graph center (if a single vertex)')
        names_features.append('graph_center')
    if graph_alg_connectivity == True:       
        graph_features.append(galg_con)              
        descr_features.append('graph algebraic connectivity (second-smallest eigenvalue of the Laplacian matrix)')
        names_features.append('graph_algebraic_connectivity')
    if graph_freeman_degree == True:       
        graph_features.append(gfrdeg)              
        descr_features.append('graph Freeman degree centralization (normed by unweighted star centralization)')
        names_features.append('graph_degree_centralization')
    if graph_freeman_betweenness == True:       
        graph_features.append(gfrbetw)              
        descr_features.append('graph Freeman betweenness centralization (normed by unweighted star centralization)')
        names_features.append('graph_betweenness_centralization')
    if graph_freeman_closeness == True:       
        graph_features.append(gfrclos)              
        descr_features.append('graph Freeman closeness centralization (normed by unweighted star centralization)')
        names_features.append('graph_closeness_centralization')
    if graph_freeman_eigenvector == True:       
        graph_features.append(gfreig)              
        descr_features.append('graph Freeman eigenvector centralization (normed by unweighted single-edge graph centralization)')
        names_features.append('graph_eigenvector_centralization')    
    
    graph_features_array = np.transpose(np.array(graph_features))
    
    if degree==True:
        node_features = np.array(degrees)
        for n in range(0, n_nodes):
            descr_features.append('weighted degree node '+str(n))
            names_features.append('degree_node_'+str(n))
    if  neighborhood_degree == True:
        node_features = np.hstack((node_features, np.array(neighb_deg)))
        node_features = np.hstack((node_features, np.array(neighb_deg_w)))
        for n in range(0, n_nodes):
            descr_features.append('Barrat average neighborhood weighted degree node '+ str(n))
            names_features.append('barrat_neighborhood_degree_node_'+ str(n))
        for n in range(0, n_nodes):
            descr_features.append('Our average neighborhood weighted degree node '+ str(n))
            names_features.append('neighborhood_degree_node_'+ str(n))
    if  closeness_centrality==True:              
        node_features = np.hstack((node_features, np.array(closen)))
        for n in range(0, n_nodes):
            descr_features.append('non-normed closeness centrality node ' + str(n))
            names_features.append('closeness_node_' + str(n))
    if  betweenness_centrality==True:              
        node_features = np.hstack((node_features, np.array(betw)))
        for n in range(0, n_nodes):
            descr_features.append('non-normed betweenness centrality node ' + str(n))
            names_features.append('betweenness_node_' + str(n))     
    if  eigenvector_centrality == True:              
        node_features = np.hstack((node_features, np.array(eigen_c)))
        for n in range(0, n_nodes):
            descr_features.append('normed (max 1) eigenvector centrality node '+ str(n))
            names_features.append('eigenvector_centrality_node_'+ str(n))  
    if  clustering_coefficient==True:              
        node_features = np.hstack((node_features, np.array(clust)))
        for n in range(0, n_nodes):
            descr_features.append('clustering coefficient node ' + str(n))
            names_features.append('clustering_coefficient_node_' + str(n))  
    if  triangles==True:              
        node_features = np.hstack((node_features, np.array(triang)))
        for n in range(0, n_nodes):
            descr_features.append('weighted triangles node ' + str(n))
            names_features.append('triangles_node_' + str(n))
    if  eccentricity==True:              
        node_features = np.hstack((node_features, np.array(ecc)))
        for n in range(0, n_nodes):
            descr_features.append('eccentricity node ' + str(n))
            names_features.append('eccentricity_node_' + str(n))
    if  characteristic_path_length==True:              
        node_features = np.hstack((node_features, np.array(cpl)))
        for n in range(0, n_nodes):
            descr_features.append('characteristic path length node ' + str(n))
            names_features.append('characteristic_path_length_node_' + str(n))                              
    if  efficiency==True:              
        node_features = np.hstack((node_features, np.array(eff)))
        for n in range(0, n_nodes):
            descr_features.append('efficiency node ' + str(n))
            names_features.append('efficiency_node_' + str(n))                                  
    if  local_efficiency==True:              
        node_features = np.hstack((node_features, np.array(leff)))
        for n in range(0, n_nodes):
            descr_features.append('local efficiency node ' + str(n))
            names_features.append('local_efficiency_node_' + str(n))                                  
        node_features = np.hstack((node_features, np.array(leff2)))
        for n in range(0, n_nodes):
            descr_features.append('local efficiency by Rubinov & Sporns node ' + str(n))
            names_features.append('local_efficiency_sporns_node_' + str(n))                                  
    fin_features = np.hstack((graph_features_array, node_features))
    fin_features_dataframe = pd.DataFrame(fin_features, columns = names_features)                              
    
    return (fin_features_dataframe, descr_features)

### Подготовим данные и обучим модель

In [None]:
path = 'Data/dti/'
data = get_autism(path)
data = matrix_eig(data, k = 30)

In [None]:
X_new = data['X_low']
X = data['X']
y = data['y'] 
print X.shape, X_new.shape, y.shape

In [None]:
fin_feat, descr_feat = symm_metrics(X)

In [None]:
for i in range(X_new.shape[0]):
    X_new[i][X_new[i] < 0] = 0

In [None]:
fin_feat_new, descr_feat_new = symm_metrics(X_new)

In [None]:
fin_feat_new.to_csv("metric-low.csv")

In [None]:
fin_feat.to_csv("metric-orig.csv")