In [None]:
from pathlib import Path
import plotly.express as px
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import pandas as pd
import numpy as np
import wandb
import torch
from torch import nn
import sys
sys.path.append(str(Path.cwd().parent))
from viz_utils import load_dfiv_model, predict_dfiv_model

In [None]:
IMAGE_DIR = Path.cwd() / 'images'

## Unidentifiable

## spaceIV

In [None]:
spaceiv_dir = Path('../../spaceIV')
spaceiv_res_root_dir = spaceiv_dir / 'results' / 'comp'
spaceiv_data_root_dir = spaceiv_dir / 'data'
case = 'case:vanilla'

In [None]:
cases = [case]
dfs = {case: pd.read_csv(spaceiv_res_root_dir / f'{case}.csv') for case in cases}
df_spaceiv = dfs[case]
df_spaceiv['model'] = 'spaceIV'
df_spaceiv

In [None]:
spaceiv_mean_mse = df_spaceiv['mse'].mean()
spaceiv_std_mse = df_spaceiv['mse'].std()
print(f'spaceIV mean mse: {spaceiv_mean_mse}, std mse: {spaceiv_std_mse}')

In [None]:
# find best sample
best_sample = df_spaceiv.iloc[df_spaceiv['mse'].idxmin()]
best_sample_name = best_sample['sample_dir']
best_sample_train_size = best_sample['data_size']
best_sample_test_size = best_sample['test_size']
best_sample_train_dir = spaceiv_data_root_dir / case / best_sample_name / f'data_size:{best_sample_train_size}'
best_sample_test_dir = spaceiv_data_root_dir / case / best_sample_name / f'data_size:{best_sample_test_size}'
assert best_sample_train_dir.exists(), f'error: best sample dir {best_sample_train_dir} not exist'
assert best_sample_test_dir.exists(), f'error: best sample dir {best_sample_test_dir} not exist'

# load from best sample
best_beta_hat = np.loadtxt(best_sample_train_dir / 'beta_hat.csv', delimiter=',')
best_beta_star = np.loadtxt(best_sample_train_dir / 'beta_star.csv', delimiter=',')
test_X = np.loadtxt(best_sample_test_dir / 'X.csv', delimiter=',')


In [None]:
num_point = 20

assert np.array_equal(np.nonzero(best_beta_star),np.nonzero(best_beta_hat)), 'error: non-zero indices not equal'
nonzero_idx = np.nonzero(best_beta_star)[0]

cols_min = np.min(test_X, axis=0)
cols_max = np.max(test_X, axis=0)
cols_mean = np.mean(test_X, axis=0)
cols_samples = np.asarray([np.linspace(cols_min[i], cols_max[i], num_point) for i in range(cols_min.shape[0])])
col0, col1 = cols_samples[:, nonzero_idx[0]], cols_samples[:, nonzero_idx[1]]

col0_min, col0_max = np.min(col0), np.max(col0)
col1_min, col1_max = np.min(col1), np.max(col1)
col0_vals = np.asarray(np.linspace(col0_min, col0_max, num_point)).reshape((-1, 1))
col1_vals = np.asarray(np.linspace(col1_min, col1_max, num_point)).reshape((-1, 1))
cols = np.hstack([col0_vals, col1_vals])
true_structual = cols @ best_beta_star[nonzero_idx]
est_structual = cols @ best_beta_hat[nonzero_idx]

In [None]:
# df = pd.DataFrame(dict(
#     x = np.tile(col0_vals.squeeze(), 2),
#     y = np.tile(col1_vals.squeeze(), 2),
#     z = np.concatenate((est_structual, true_structual)),
#     type = np.array(['estimation'] * num_point + ['truth'] * num_point)
# ))
# fig = px.line_3d(df, x="x", y="y", z='z', color='type', title="estimation") 
# fig.show()

In [None]:
nonzero_idx

In [None]:
fig = plt.figure()
ax = plt.axes(projection='3d')
ax.w_xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.w_yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.w_zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))

ax.plot(col0_vals.squeeze(), col1_vals.squeeze(), true_structual, 'orange', label='truth', linewidth=5)
ax.plot(col0_vals.squeeze(), col1_vals.squeeze(), est_structual, 'blue', label='estimation')
ax.set_zlabel('$Y$')
ax.set_xlabel(f'$X_8$')
ax.set_ylabel('$X_{14}$')
ax.legend()

## DFIV

In [None]:
run_path = 'jasmineqy0/spaceIV-final/3hvmbt5r'

In [None]:
# load model
dfiv_model = load_dfiv_model(run_path)

In [None]:
# predict
test_X = np.tile(cols_mean, (num_point, 1))
test_X[:, nonzero_idx] = cols_samples[:, nonzero_idx]
# dfiv_pred = mdl.predict_t(treatment=test_X_tensor, covariate=None).detach().numpy()
dfiv_pred = predict_dfiv_model(dfiv_model, test_X, None)

In [None]:
fig = plt.figure()
ax = plt.axes(projection='3d')
ax.w_xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.w_yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.w_zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))

ax.plot(col0_vals.squeeze(), col1_vals.squeeze(), true_structual, 'orange', label='Truth', linewidth=5)
ax.plot(col0_vals.squeeze(), col1_vals.squeeze(), est_structual, 'blue', label='SpaceIV')
ax.plot(col0_vals.squeeze(), col1_vals.squeeze(), dfiv_pred.squeeze(), 'green', label='DFIV')
ax.set_zlabel('$Y$')
ax.set_xlabel('$X_8$')
ax.set_ylabel('$X_{14}$')
ax.legend()
plt.savefig(IMAGE_DIR / 'data:spaceiv_alg:spaceiv_and_dfiv.pdf')

## Varying SpaceIV Dataset Size

In [None]:
from viz_utils import load_dfiv_runs

In [None]:
entity, project = "jasmineqy0", "spaceIV-final"  
runs_df = load_dfiv_runs(entity, project)
runs_df

In [None]:
df_size_3200 = runs_df[runs_df['config'].apply(lambda x: x['data_configs']['data_size']) == 3200]
df_size_1600 = runs_df[runs_df['config'].apply(lambda x: x['data_configs']['data_size']) == 1600]

In [None]:
mean_3200 = df_size_3200['summary'].apply(lambda x: x['test loss']).mean()
std_3200 = df_size_3200['summary'].apply(lambda x: x['test loss']).std()
print(f"mean: {mean_3200}, std: {std_3200}")

In [None]:
mean_1600 = df_size_1600['summary'].apply(lambda x: x['test loss']).mean()
std_1600 = df_size_1600['summary'].apply(lambda x: x['test loss']).std()
print(f"mean: {mean_1600}, std: {std_1600}")

# Identifiable