In [46]:
import plotly.express as px
from glob import glob
import os
import pandas as pd
import csv
import numpy as np

from collections import defaultdict

# DFIV Results

In [77]:
# import plotly.io as pio
# pio.renderers.default = 'iframe' # or 'notebook' or 'colab' or 'jupyterlab'
color_discrete_map = {
    'dfiv': '#7149C6',
    'dfiv_original': '#0F6292',
    'dfiv_hetero1': '#FF8400',
    'dfiv_hetero1.5': '#379237',
    'dfiv_hetero0.5': '#FF6464'
}

In [78]:
DUMP_DIR = '../dumps'
oi = {'dfiv': 'dfiv',
        'dfiv_original': 'dfiv_original',
        'dfiv_hetero1': 'dfiv_hetero1',
        'dfiv_hetero1.8': 'dfiv_hetero1.8',
        'dfiv_hetero0.2': 'dfiv_hetero0.2'}

In [79]:
def get_results(oi):
    alldict = defaultdict(list)
    for key, dir in oi.items():
        m = 0
        for subdir in glob(os.path.join(DUMP_DIR, dir, 'data_size*')):
            params = [param.split(':') for param in subdir.split('/')[-1].split('-')]
            
            if os.path.exists(os.path.join(subdir, 'result.csv')):
                with open (os.path.join(subdir, 'result.csv'), 'r') as f:
                    reader = csv.reader(f)
                    mse = [float(row[0]) for row in list(reader)]
                    n = len(mse)
                    m += n

                params = {param[0]: [float(param[1])] * n for param in params}
                subdict = {**params, 'mse': mse, 'case': [key] * n}
                alldict = {key: subdict[key] + alldict[key] for key in subdict.keys()}
    return pd.DataFrame(alldict)

In [80]:
df =  get_results(oi)
df

Unnamed: 0,data_size,rho,mse,case
0,1000.0,0.5,927.434082,dfiv_hetero0.2
1,1000.0,0.5,815.457520,dfiv_hetero0.2
2,1000.0,0.5,1076.244141,dfiv_hetero0.2
3,1000.0,0.5,1122.224243,dfiv_hetero0.2
4,1000.0,0.5,2527.824707,dfiv_hetero0.2
...,...,...,...,...
195,1000.0,0.1,1114.532593,dfiv
196,1000.0,0.1,736.397644,dfiv
197,1000.0,0.1,1416.458740,dfiv
198,1000.0,0.1,991.781067,dfiv


In [51]:
fig = px.box(df, x='data_size', y='mse', color='case', l
                og_y=True, points="all",
                facet_col='rho', facet_col_wrap=3,
                color_discrete_map = color_discrete_map)
fig.show()

In [52]:
df_avg = df.groupby(['data_size', 'rho', 'case']).mean()
df_avg = df_avg.reset_index()
# df_avg

In [53]:
fig = px.bar(df_avg, x='data_size', y='mse', color='case', barmode='group',
                facet_col='rho', facet_col_wrap=3,
                color_discrete_map = color_discrete_map)
fig.update_xaxes(type='category')
fig.show()

# DFIV vs SparseIV

In [82]:
case = 'case_vanilla'

In [83]:
cases = [case]
res_dir = '../../spaceIV/results/comp/'
dfs = {case: pd.read_csv(os.path.join(res_dir, f'{case}.csv')) for case in cases}
df_sparseiv = dfs[case]
df_sparseiv['model'] = 'sparseiv'

In [84]:
oi = {case: case}
df_dfiv =  get_results(oi)
df_dfiv['model'] = 'dfiv'

In [85]:
df_merge = pd.concat([df_sparseiv,df_dfiv], axis=0)
df_merge

Unnamed: 0.1,Unnamed: 0,case,data_size,test_size,mse,model,val_size
0,0.0,case_vanilla,1600.0,180.0,10.494213,sparseiv,
1,1.0,case_vanilla,1600.0,180.0,9.783889,sparseiv,
2,2.0,case_vanilla,1600.0,180.0,12.687496,sparseiv,
3,3.0,case_vanilla,1600.0,180.0,15.497702,sparseiv,
4,4.0,case_vanilla,1600.0,180.0,6.417971,sparseiv,
5,5.0,case_vanilla,1600.0,180.0,9.23792,sparseiv,
6,6.0,case_vanilla,1600.0,180.0,18.877758,sparseiv,
7,7.0,case_vanilla,1600.0,180.0,14.849074,sparseiv,
8,8.0,case_vanilla,1600.0,180.0,14.391256,sparseiv,
9,9.0,case_vanilla,1600.0,180.0,12.277851,sparseiv,


In [87]:
fig = px.box(df_merge, x='model', y='mse', color='model', 
             points="all",)
fig.show()