In [1]:
## run LASSO on network metrics

# so far this has the highest accuracy on 0back vs 2back prediction (around 71% to 79%)

import pandas as pd
import numpy as np
import os
import re
import glob

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegressionCV
from sklearn.model_selection import cross_val_score

input_dir  = 'hcp_out03_network_analysis_0back/'
input_dir2 = 'hcp_out03_network_analysis_2back/'

output_dir = 'hcp_out05_lasso_network_metrics/'

## subject info:


In [9]:
# data_files = glob.glob(input_dir + '/adni_out04_entropy_freq_*.csv')
metrics = ['betweenness', 'clustering', 'degree', 'eigenvector', 'closeness']
data_files = [input_dir + m + '.csv' for m in metrics]
data_files.sort()

metrics = ['betweenness', 'clustering', 'degree', 'eigenvector', 'closeness']
data_files2 = [input_dir2 + m + '.csv' for m in metrics]
data_files2.sort()

all_result = []
print_group_size = True

result_accuracy = []
result_coefs = []

for f1, f2 in zip(data_files, data_files2):
    
    print(f1)
    data = pd.read_csv(f1, index_col = 0)
    data2= pd.read_csv(f2, index_col= 0)
    # break
    num1 = data.shape[0]
    num2 = data2.shape[0]
    
    if print_group_size:
        print(num1)
        print(num2)
        print_group_size = False
    
    X = pd.concat([data, data2], axis = 0).values
    y = np.hstack((np.ones((num1)), np.zeros((num2))))
    
    scaler = StandardScaler()
    scaler.fit(X)
    X = scaler.transform(X)
    C_values = np.logspace(-2, 1, 10)
    # C_values = np.logspace(5, 10, 10)
    
    # build lassoCV that tune parameters with inner cv:
    reg = LogisticRegressionCV(cv = 5, random_state = 0, Cs = C_values, n_jobs = 4,
                               penalty = 'l1', solver='liblinear', 
                               refit = False).fit(X, y)
    # note: .fit(X, y) is only for coefs report (reg.coef_.). cross_val_score only need the estimator.

    print(reg.C_)
    # run outer cv:
    cv_result = cross_val_score(reg, X, y, cv = 5)
    print('cross validaton result, mean %3f, std: %3f' % (cv_result.mean(), cv_result.std()))
    result_accuracy.append(cv_result)
    result_coefs.append(reg.coef_.reshape(-1))
        
    #     break
    # break
    
print('finished')


hcp_out03_network_analysis_0back/betweenness.csv
959
959
[0.14617739]
cross validaton result, mean 0.725247, std: 0.011539
hcp_out03_network_analysis_0back/closeness.csv
[0.11237187]
cross validaton result, mean 0.779473, std: 0.018892
hcp_out03_network_analysis_0back/clustering.csv
[0.31546057]
cross validaton result, mean 0.713272, std: 0.024997
hcp_out03_network_analysis_0back/degree.csv
[0.11237187]
cross validaton result, mean 0.784172, std: 0.021866
hcp_out03_network_analysis_0back/eigenvector.csv
[0.07856636]
cross validaton result, mean 0.785209, std: 0.022973
finished


In [3]:
## save results:
if not os.path.exists(output_dir):
    os.mkdir(output_dir)


res = pd.DataFrame(result_accuracy)
res.columns = ['cv' + str(i) for i in range(1, 6)]
res.index = metrics

res.to_csv(output_dir + '/lasso_accuracy.csv')

res = pd.DataFrame(result_coefs)
res.columns = ['roi' + str(i) for i in range(1, 270)]
res.index = metrics

res.to_csv(output_dir + '/lasso_coefs.csv')


In [4]:
res

Unnamed: 0,roi1,roi2,roi3,roi4,roi5,roi6,roi7,roi8,roi9,roi10,...,roi260,roi261,roi262,roi263,roi264,roi265,roi266,roi267,roi268,roi269
betweenness,0.037368,0.100628,-0.004499,-0.023439,0.00111,-0.009019,-0.034921,0.259085,0.230482,0.134239,...,0.023883,0.005862,0.08928,-0.072129,-0.064212,-0.0809,0.026465,0.094212,-0.099988,-0.059244
clustering,0.065784,-0.000725,0.011246,-0.250416,-0.01277,-0.149636,-0.132087,0.35222,-0.103697,0.292751,...,0.023773,0.046594,0.043041,-0.053157,0.037208,0.039302,-0.021911,0.036465,-0.006783,-0.055917
degree,0.03884,-0.053933,0.024924,-0.230839,-0.035203,-0.126839,0.053418,0.065482,-0.171936,0.165632,...,-0.137687,0.035601,-0.278367,-0.018436,0.295621,0.111261,-0.064869,-0.208626,0.068601,-0.075404
eigenvector,0.072803,0.015649,0.013073,-0.255716,-0.027915,-0.159047,-0.145595,0.363569,-0.123278,0.322841,...,-0.003363,0.025726,0.037948,-0.097697,0.07723,0.06997,-0.022663,0.04861,-0.014885,-0.079203
closeness,0.068948,0.018628,0.005931,-0.327757,-0.001649,-0.107064,-0.101766,0.334804,-0.111824,0.281065,...,-0.008821,0.028293,0.049919,-0.053204,0.097772,0.116346,-0.034466,0.010502,0.022357,-0.07859
