In [12]:
## compare the prediction accuracy of lasso/or other machine learning results.

import pandas as pd
import os
import re
import numpy as np
import glob
from scipy import stats
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf


input_dir1 = 'ocd_out05_randomforest/'
input_dir2 = 'ocd_out05_randomforest_network_metrics/'

output_dir = 'ocd_out06_compare_rf_results/'

if not os.path.exists(output_dir):
    os.mkdir(output_dir)


In [13]:
sns.set(rc = {'figure.figsize':(15,8)})
sns.set_style("whitegrid", {'axes.grid': False})

files1 = glob.glob(input_dir1 + 'rf_accuracy_*.csv')
files2 = glob.glob(input_dir2 + 'rf_accuracy_*.csv')

for f1, f2 in zip(files1, files2):
    
    accuracy1 = pd.read_csv(f1)
    accuracy2 = pd.read_csv(f2)
    title = re.search('(.*)_accuracy_(.*).csv', f1).group(2)
    figure_name = 'accuracy_comparision_' + title + '.png'
    
    accuracy = pd.concat([accuracy1, accuracy2], axis = 0, keys = ['harmonic', 'network']).reset_index(level = 0)
    
    accuracy.rename(columns = {'Unnamed: 0': 'metric', 'level_0': 'group'}, inplace = True)
    plot_data = pd.melt(accuracy, id_vars = ['metric', 'group'], value_name = 'accuracy')
    
    md = smf.mixedlm("accuracy ~ group", plot_data, groups=plot_data["metric"])
    mdf = md.fit()
    print(title)
    print(mdf.summary())
    
    sns.swarmplot(data = plot_data, x = 'metric', y = 'accuracy', hue = 'group')
    plt.xticks(rotation = 45)
    plt.title(title.replace('_', ' vs. '))
    
    plt.savefig(output_dir + figure_name)
    plt.close()
    
    # break





hc_ocd
          Mixed Linear Model Regression Results
Model:              MixedLM  Dependent Variable:  accuracy
No. Observations:   75       Method:              REML    
No. Groups:         15       Scale:               0.0140  
Min. group size:    5        Log-Likelihood:      48.7394 
Max. group size:    5        Converged:           Yes     
Mean group size:    5.0                                   
----------------------------------------------------------
                 Coef. Std.Err.   z    P>|z| [0.025 0.975]
----------------------------------------------------------
Intercept        0.556    0.017 33.279 0.000  0.523  0.589
group[T.network] 0.116    0.029  4.015 0.000  0.060  0.173
Group Var        0.000    0.010                           



In [14]:
mdf.summary()

0,1,2,3
Model:,MixedLM,Dependent Variable:,accuracy
No. Observations:,75,Method:,REML
No. Groups:,15,Scale:,0.0140
Min. group size:,5,Log-Likelihood:,48.7394
Max. group size:,5,Converged:,Yes
Mean group size:,5.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,0.556,0.017,33.279,0.000,0.523,0.589
group[T.network],0.116,0.029,4.015,0.000,0.060,0.173
Group Var,0.000,0.010,,,,


In [11]:
files2

[]