In [17]:
## compare the prediction accuracy of lasso/or other machine learning results.

import pandas as pd
import os
import re
import numpy as np
import glob
from scipy import stats
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf

input_dir1 = 'adni_out05_lasso/'
input_dir2 = 'adni_out05_lasso_network_metrics/'

output_dir = 'adni_out06_compare_lasso_results/'

if not os.path.exists(output_dir):
    os.mkdir(output_dir)


In [23]:
sns.set(rc = {'figure.figsize':(15,8)})
sns.set_style("whitegrid", {'axes.grid': False})

files1 = glob.glob(input_dir1 + 'lasso_accuracy_*.csv')
files2 = glob.glob(input_dir2 + 'lasso_accuracy_*.csv')

for f1, f2 in zip(files1, files2):
    
    accuracy1 = pd.read_csv(f1)
    accuracy2 = pd.read_csv(f2)
    title = re.search('(.*)_accuracy_(.*).csv', f1).group(2)
    figure_name = 'accuracy_comparision_' + title + '.png'
    
    accuracy = pd.concat([accuracy1, accuracy2], axis = 0, keys = ['harmonic', 'network']).reset_index(level = 0)
    
    accuracy.rename(columns = {'Unnamed: 0': 'metric', 'level_0': 'group'}, inplace = True)
    plot_data = pd.melt(accuracy, id_vars = ['metric', 'group'], value_name = 'accuracy')
    
    md = smf.mixedlm("accuracy ~ group", plot_data, groups=plot_data["metric"])
    mdf = md.fit()
    
    sns.swarmplot(data = plot_data, x = 'metric', y = 'accuracy', hue = 'group')
    plt.xticks(rotation = 45)
    plt.title(title.replace('_', ' vs. '))
    
    plt.savefig(output_dir + figure_name)
    plt.close()
    
    # break





0,1,2,3
Model:,MixedLM,Dependent Variable:,accuracy
No. Observations:,75,Method:,REML
No. Groups:,15,Scale:,0.0039
Min. group size:,5,Log-Likelihood:,95.7718
Max. group size:,5,Converged:,Yes
Mean group size:,5.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,0.732,0.009,83.357,0.000,0.714,0.749
group[T.network],-0.045,0.015,-2.970,0.003,-0.075,-0.015
Group Var,0.000,0.006,,,,


In [24]:
mdf.summary()

0,1,2,3
Model:,MixedLM,Dependent Variable:,accuracy
No. Observations:,75,Method:,REML
No. Groups:,15,Scale:,0.0039
Min. group size:,5,Log-Likelihood:,95.7718
Max. group size:,5,Converged:,Yes
Mean group size:,5.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,0.732,0.009,83.357,0.000,0.714,0.749
group[T.network],-0.045,0.015,-2.970,0.003,-0.075,-0.015
Group Var,0.000,0.006,,,,
