In [83]:
from pathlib import Path
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import pandas as pd
import numpy as np
import wandb
import torch
from torch import nn
import sys
sys.path.append(str(Path.cwd().parent))
from viz_utils import load_dfiv_model, predict_dfiv_model, load_dfiv_runs
import plotly.express as px
from src.data.demand_design_parcs_revise import psi, f

In [84]:

from IPython.display import display, HTML
import plotly
plotly.offline.init_notebook_mode(connected=True)

# The polling here is to ensure that plotly.js has already been loaded before
# setting display alignment in order to avoid a race condition.
display(HTML(
    '<script>'
        'var waitForPlotly = setInterval( function() {'
            'if( typeof(window.Plotly) !== "undefined" ){'
                'MathJax.Hub.Config({ SVG: { font: "STIX-Web" }, displayAlign: "center" });'
                'MathJax.Hub.Queue(["setRenderer", MathJax.Hub, "SVG"]);'
                'clearInterval(waitForPlotly);'
            '}}, 250 );'
    '</script>'
))

In [85]:
entity, project = "jasmineqy0", "formal_3"  
dup = 40
rho = 0.5

# Original

In [86]:
group = 'dfiv_low_dim_parcs_revise'
runs_df = load_dfiv_runs(entity, project, filters={'group' : group})
runs_df = runs_df[runs_df['state'].apply(lambda x: x == 'finished')]
runs_df = runs_df[runs_df['config'].apply(lambda x: x['data_configs']['data_size'] == 10000)]
runs_df = runs_df[runs_df['config'].apply(lambda x: x['data_configs']['rho'] == rho)]
print('Number of runs: ', len(runs_df))

Number of runs:  40


In [87]:
test_loss_idx = runs_df['summary'].apply(lambda x: True if 'test loss' in x else False)
test_loss_df = runs_df[test_loss_idx]
print('Number of runs with test loss: ', len(test_loss_df))

original_test_loss = test_loss_df['summary'].apply(lambda x: x['min_test_loss'])

min_idx, min_test_loss = np.argmin(original_test_loss), np.min(original_test_loss)
print(f"min test loss: {min_test_loss}\n")

original_mean_test_loss = np.mean(original_test_loss)
print('mean test loss: ', original_mean_test_loss)

original_std_test_loss = np.std(original_test_loss)
print('std test loss: ', original_std_test_loss)

Number of runs with test loss:  40
min test loss: 473.4062805175781

mean test loss:  23899.876779937746
std test loss:  44501.56865893741


In [88]:
best_run = runs_df.iloc[min_idx]
assert best_run['summary']['min_test_loss'] == min_test_loss, 'min test loss not equal to min test loss in test loss df'
best_run_name, best_run_id = best_run['name'], best_run['id']
print(f'best run name: {best_run_name}, best run id: {best_run_id}')

run_path_original = '/'.join([entity, project, best_run_id])
print(f'best run path: {run_path_original}')

dfiv_original_model = load_dfiv_model(run_path_original)

best run name: rural-energy-1405, best run id: m5fng05e
best run path: jasmineqy0/formal_3/m5fng05e


In [89]:
original_test_rho = test_loss_df['config'].apply(lambda x: x['data_configs']['rho'])
# original_test_sigma = test_loss_df['config'].apply(lambda x: x['data_configs']['sigma'])
original_test_sigma = len(original_test_rho) * [0]

original_sigma_seq = np.unique(np.sort(original_test_sigma)).tolist()

df_original = pd.DataFrame({'sigma': original_test_sigma, 'rho': original_test_rho, 'test loss': original_test_loss})
df_original = df_original.groupby(['sigma', 'rho']).mean().reset_index()

# Sigma 0.5

In [90]:
group = 'dfiv_low_dim_noise_price_sigma_1'
runs_df = load_dfiv_runs(entity, project, filters={'group' : group})
runs_df = runs_df[runs_df['state'].apply(lambda x: x == 'finished')]
runs_df = runs_df[runs_df['config'].apply(lambda x: x['data_configs']['data_size'] == 10000)]
runs_df = runs_df[runs_df['config'].apply(lambda x: x['data_configs']['parcs_config'] == 'demand_noise_price_sigma_0.5')]
print('Number of runs: ', len(runs_df))

Number of runs:  40


In [91]:
test_loss_idx = runs_df['summary'].apply(lambda x: True if 'test loss' in x else False)
test_loss_df = runs_df[test_loss_idx]

sigma_half_test_loss = test_loss_df['summary'].apply(lambda x: x['min_test_loss'])

min_idx, min_test_loss = np.argmin(sigma_half_test_loss), np.min(sigma_half_test_loss)
best_run = runs_df.iloc[min_idx]
print(f"min test loss: {best_run['summary']['min_test_loss']}\n")

sigma_half_mean_test_loss = np.mean(sigma_half_test_loss)
print('mean test loss: ', sigma_half_mean_test_loss)

sigma_half_std_test_loss = np.std(sigma_half_test_loss)
print('std test loss: ', sigma_half_std_test_loss)

min test loss: 282.2323303222656

mean test loss:  42525.09740600586
std test loss:  73965.91159502012


In [92]:
assert best_run['summary']['min_test_loss'] == min_test_loss, 'min test loss not equal to min test loss in test loss df'
best_run_name, best_run_id = best_run['name'], best_run['id']
print(f'best run name: {best_run_name}, best run id: {best_run_id}')

run_path_sigma_half = '/'.join([entity, project, best_run_id])
print(f'best run path: {run_path_sigma_half}')

dfiv_hetero_sigma_half_model = load_dfiv_model(run_path_sigma_half)

best run name: lilac-morning-1048, best run id: 47de1jro
best run path: jasmineqy0/formal_3/47de1jro


# Sigma 2

In [93]:
group = 'dfiv_low_dim_noise_price_sigma_1'
runs_df = load_dfiv_runs(entity, project, filters={'group' : group})
runs_df = runs_df[runs_df['state'].apply(lambda x: x == 'finished')]
runs_df = runs_df[runs_df['config'].apply(lambda x: x['data_configs']['data_size'] == 10000)]
runs_df = runs_df[runs_df['config'].apply(lambda x: x['data_configs']['parcs_config'] == 'demand_noise_price_sigma_2')]
print('Number of runs: ', len(runs_df))

Number of runs:  40


In [94]:
test_loss_idx = runs_df['summary'].apply(lambda x: True if 'min_test_loss' in x else False)
test_loss_df = runs_df[test_loss_idx]

sigma_2_test_loss = test_loss_df['summary'].apply(lambda x: x['min_test_loss'])

min_idx, min_test_loss = np.argmin(sigma_2_test_loss), np.min(sigma_2_test_loss)
print(f"min test loss: {best_run['summary']['min_test_loss']}\n")

sigma_2_mean_test_loss = np.mean(sigma_2_test_loss)
print('mean test loss: ', sigma_2_mean_test_loss)

sigma_2_std_test_loss = np.std(sigma_2_test_loss)
print('std test loss: ', sigma_2_std_test_loss)

min test loss: 282.2323303222656

mean test loss:  20794.825450134278
std test loss:  43628.72628495271


In [95]:
best_run = runs_df.iloc[min_idx]
assert best_run['summary']['min_test_loss'] == min_test_loss, 'min test loss not equal to min test loss in test loss df'
best_run_name, best_run_id = best_run['name'], best_run['id']
print(f'best run name: {best_run_name}, best run id: {best_run_id}')

run_path_sigma_2 = '/'.join([entity, project, best_run_id])
print(f'best run path: {run_path_sigma_2}')

dfiv_hetero_sigma_2_model = load_dfiv_model(run_path_sigma_2)

best run name: celestial-cosmos-1104, best run id: mn0g210k
best run path: jasmineqy0/formal_3/mn0g210k


# Sigma 4

In [96]:
group = 'dfiv_low_dim_noise_price_sigma_1'
runs_df = load_dfiv_runs(entity, project, filters={'group' : group})
runs_df = runs_df[runs_df['state'].apply(lambda x: x == 'finished')]
runs_df = runs_df[runs_df['config'].apply(lambda x: x['data_configs']['data_size'] == 10000)]
runs_df = runs_df[runs_df['config'].apply(lambda x: x['data_configs']['parcs_config'] == 'demand_noise_price_sigma_4')]
print('Number of runs: ', len(runs_df))

Number of runs:  40


In [97]:
test_loss_idx = runs_df['summary'].apply(lambda x: True if 'test loss' in x else False)
test_loss_df = runs_df[test_loss_idx]

sigma_4_test_loss = test_loss_df['summary'].apply(lambda x: x['min_test_loss'])

min_idx, min_test_loss = np.argmin(sigma_4_test_loss), np.min(sigma_4_test_loss)
print(f"min test loss: {best_run['summary']['min_test_loss']}\n")

sigma_4_mean_test_loss = np.mean(sigma_4_test_loss)
print('mean test loss: ', sigma_4_mean_test_loss)

sigma_4_std_test_loss = np.std(sigma_4_test_loss)
print('std test loss: ', sigma_4_std_test_loss)

min test loss: 867.0722045898438

mean test loss:  19411.78975830078
std test loss:  42774.05515712865


In [98]:
best_run = runs_df.iloc[min_idx]
assert best_run['summary']['min_test_loss'] == min_test_loss, 'min test loss not equal to min test loss in test loss df'
best_run_name, best_run_id = best_run['name'], best_run['id']
print(f'best run name: {best_run_name}, best run id: {best_run_id}')

run_path_sigma_4 = '/'.join([entity, project, best_run_id])
print(f'best run path: {run_path_sigma_4}')

dfiv_hetero_sigma_4_model = load_dfiv_model(run_path_sigma_4)

best run name: generous-music-1257, best run id: m3xb61gu
best run path: jasmineqy0/formal_3/m3xb61gu


# Sigma 8

In [99]:
group = 'dfiv_low_dim_noise_price_sigma_1'
runs_df = load_dfiv_runs(entity, project, filters={'group' : group})
runs_df = runs_df[runs_df['state'].apply(lambda x: x == 'finished')]
runs_df = runs_df[runs_df['config'].apply(lambda x: x['data_configs']['data_size'] == 10000)]
runs_df = runs_df[runs_df['config'].apply(lambda x: x['data_configs']['parcs_config'] == 'demand_noise_price_sigma_8')]
print('Number of runs: ', len(runs_df))

Number of runs:  40


In [100]:
test_loss_idx = runs_df['summary'].apply(lambda x: True if 'test loss' in x else False)
test_loss_df = runs_df[test_loss_idx]

sigma_8_test_loss = test_loss_df['summary'].apply(lambda x: x['min_test_loss'])

min_idx, min_test_loss = np.argmin(sigma_8_test_loss), np.min(sigma_8_test_loss)
print(f"min test loss: {best_run['summary']['min_test_loss']}\n")

sigma_8_mean_test_loss = np.mean(sigma_8_test_loss)
print('mean test loss: ', sigma_8_mean_test_loss)

sigma_8_std_test_loss = np.std(sigma_8_test_loss)
print('std test loss: ', sigma_8_std_test_loss)

min test loss: 1749.1513671875

mean test loss:  14172.035009765625
std test loss:  9891.87679881794


In [101]:
best_run = runs_df.iloc[min_idx]
assert best_run['summary']['min_test_loss'] == min_test_loss, 'min test loss not equal to min test loss in test loss df'
best_run_name, best_run_id = best_run['name'], best_run['id']
print(f'best run name: {best_run_name}, best run id: {best_run_id}')

run_path_sigma_8 = '/'.join([entity, project, best_run_id])
print(f'best run path: {run_path_sigma_8}')

dfiv_hetero_sigma_8_model = load_dfiv_model(run_path_sigma_8)

best run name: sweet-flower-1360, best run id: de9gofga
best run path: jasmineqy0/formal_3/de9gofga


# CATE

In [102]:
num_point = 50

In [103]:
# vary t: fix p = 25, S = E[S]
emotion_range = np.array([1, 2, 3, 4, 5, 6, 7])
price_val = 25
price = np.ones(num_point) * price_val
time = np.linspace(0.0, 10, num_point)
emotion = (np.ones(num_point) * np.mean(emotion_range))
covariate = np.c_[time, emotion]

In [104]:
true_demand = f(price, time, emotion)

price = price[:, np.newaxis]
original_demand = predict_dfiv_model(dfiv_original_model, price, covariate)
mu_4_pred_demand = predict_dfiv_model(dfiv_hetero_mu_4_model, price, covariate)
mu_8_pred_demand = predict_dfiv_model(dfiv_hetero_mu_8_model, price, covariate)
mu_16_pred_demand = predict_dfiv_model(dfiv_hetero_mu_16_model, price, covariate)

NameError: name 'dfiv_hetero_mu_4_model' is not defined

In [None]:
fig, ax = plt.subplots()
time = time.squeeze()
l1 = ax.plot(time, true_demand, color='red', label='Truth')
l2 = ax.plot(time, original_demand, color='blue', label='DFIV: original')
l3 = ax.plot(time, mu_4_pred_demand, color='pink', label='DFIV: $\mu(V)=4$ ')
l4 = ax.plot(time, mu_8_pred_demand, color='orange', label='DFIV: $\mu(V)=8$ ')
l5 = ax.plot(time, mu_16_pred_demand, color='green', label='DFIV: $\mu(V)=16$ ')
ax.legend()

ax.set_xlabel('Time of the year t')
ax.set_ylabel('$\mathbb{E}[Y\mid do(P=25), T=t]$')
plt.savefig(f'images/CATE_mean_mu.pdf')
plt.show()

# ATE

In [None]:
num_point = 50

In [None]:
# vary p: fix t = E[t] = 5, S = E[S] = 4
emotion_range = np.array([1, 2, 3, 4, 5, 6, 7])
price = np.ones(num_point) * np.linspace(10, 25, num_point)
time = np.linspace(0.0, 10, num_point)
emotion = (np.ones(num_point) * np.mean(emotion_range))
covariate = np.c_[time, emotion]

In [None]:
G = np.mean(psi(time)) * np.mean(emotion_range)
coef = G - 2
offset = 100 + 10 * G

true_demand = coef * price + offset

In [None]:
# # vary p: fix t = E[t] = 5, S = E[S] = 4
# emotion_range = np.array([1, 2, 3, 4, 5, 6, 7])
# price_val = np.linspace(10, 25, num_point)
# price = np.ones(num_point) * price_val
# time = np.array([5] * num_point)
# emotion = (np.ones(num_point) * np.mean(emotion_range))
# covariate = np.c_[time, emotion]

In [None]:
original_demand = []
for p in price:
    p = (np.ones(num_point) * p)[:, np.newaxis]
    pred = np.mean((predict_dfiv_model(dfiv_original_model, p, covariate)))
    original_demand.append(pred)

mu_4_pred_demand = []
for p in price:
    p = (np.ones(num_point) * p)[:, np.newaxis]
    pred = np.mean((predict_dfiv_model(dfiv_hetero_mu_4_model, p, covariate)))
    mu_4_pred_demand.append(pred)
    
mu_8_pred_demand = []
for p in price:
    p = (np.ones(num_point) * p)[:, np.newaxis]
    pred = np.mean((predict_dfiv_model(dfiv_hetero_mu_8_model, p, covariate)))
    mu_8_pred_demand.append(pred)
    
mu_16_pred_demand = []
for p in price:
    p = (np.ones(num_point) * p)[:, np.newaxis]
    pred = np.mean((predict_dfiv_model(dfiv_hetero_mu_16_model, p, covariate)))
    mu_16_pred_demand.append(pred)

In [None]:
fig, ax = plt.subplots()
price = price.squeeze()
l1 = ax.plot(time, true_demand, color='red', label='Truth')
l2 = ax.plot(time, original_demand, color='blue', label='DFIV: original')
l3 = ax.plot(time, mu_4_pred_demand, color='pink', label='DFIV: $\mu(V)=4$ ')
l4 = ax.plot(time, mu_8_pred_demand, color='orange', label='DFIV: $\mu(V)=8$ ')
l5 = ax.plot(time, mu_16_pred_demand, color='green', label='DFIV: $\mu(V)=16$ ')

ax.legend()

ax.set_xlabel('Time t')
ax.set_ylabel('$\mathbb{E}[Y\mid do(P=p)]$')
plt.savefig(f'images/ATE_mean_mu.pdf')
plt.show()

# Average DFIV

In [None]:
mu = [0, 4, 8, 16]
test_mean_loss = [original_mean_test_loss, mu_4_mean_test_loss, mu_8_mean_test_loss, mu_16_mean_test_loss]
test_loss = [original_test_loss, mu_4_test_loss, mu_8_test_loss, mu_16_test_loss]

In [None]:
df_mean_loss = pd.DataFrame({'mu': mu, 'test_loss': test_mean_loss})
fig = px.bar(df_mean_loss, x='mu', y='test_loss',title='Test loss for different mean V',
             category_orders={'mu': mu})
fig.update_xaxes(type='category')
fig.show()

In [None]:
from itertools import chain

mu = [[i] * dup for i in mu]
mu = list(chain(*mu))
test_loss = np.hstack(test_loss)

df_loss = pd.DataFrame({'mu': mu, 'test_loss': test_loss})

In [None]:
fig = px.box(df_loss, x='mu', y='test_loss',title='Test loss for different V', log_y=True)
fig.update_xaxes(type='category')
fig.show()