In [1]:
import torch
torch.set_default_tensor_type(torch.FloatTensor) 
import copy

from rct_data_generator import *
from outcome_models import *
from plotting_functions import *
from mcmc_bayes_update import *
from eig_comp_utils import *
from research_exp_utils import *
from rct_data_generator import generate_host_and_mirror


  _C._set_default_tensor_type(t)
  from .autonotebook import tqdm as notebook_tqdm


## Set-up

In [2]:
n_host_sample = 100 
sigma_error = 1 
d = 10 
A = torch.randn((d,d))
A = 1/(torch.det(A)) * A

T_allocation_host = torch.randn(d)
T_allocation_host = 100/torch.norm(T_allocation_host)*T_allocation_host

mu_nc = torch.randn(d)
mu_nc = 1/torch.norm(mu_nc)*mu_nc

mu_c = torch.randn(d)
mu_c = 1/torch.norm(mu_c)*mu_c

mu = torch.concat([mu_nc,mu_c])

In [3]:

X_host_no_T = (torch.randn((n_host_sample,d)) @ A ) 
T_host = torch.bernoulli(torch.sigmoid(X_host_no_T@ T_allocation_host))
X_host_times_T = (T_host.unsqueeze(dim=0).T * X_host_no_T)
X_host = torch.concat([X_host_no_T,X_host_times_T],dim=1)

Y_host = X_host @ mu
Y_host = (1/Y_host.norm()) * Y_host + sigma_error * torch.randn_like(Y_host)


In [4]:
prior_mean = torch.zeros(2 * d)
beta_0, sigma_0_sq,inv_cov_0 = prior_mean, sigma_error,torch.eye(2*d)
prior_hyperparameters = {'beta_0': beta_0, 'sigma_0_sq': sigma_0_sq,"inv_cov_0":inv_cov_0}
bayesian_regression = BayesianLinearRegression(prior_hyperparameters)
bayesian_regression.set_causal_index(10)

In [5]:
bayesian_regression.fit(X_host,Y_host)

{'posterior_mean': tensor([-0.0435,  0.3487,  0.2949,  0.1082, -0.2580, -0.0619, -0.5854,  0.5546,
          0.1524,  1.0160, -0.1366, -0.0477, -0.1461, -0.0547, -0.0369, -0.0287,
         -0.3866,  0.3770,  0.0457,  0.4634]),
 'posterior_cov_matrix': tensor([[ 8.5015e-01,  6.7748e-03, -1.3977e-02, -6.3023e-02, -4.4866e-02,
           9.6525e-02, -3.1807e-02,  8.7241e-02, -1.1799e-02,  1.5242e-02,
          -9.0625e-02,  4.2582e-03, -1.1958e-02, -3.5408e-02, -2.3658e-02,
           6.7249e-02, -1.9518e-02,  4.2878e-02,  5.9992e-03,  1.5459e-02],
         [ 6.7748e-03,  7.9822e-01, -5.5964e-02, -6.0579e-02,  6.6949e-02,
          -7.4243e-02,  2.7848e-02, -7.3350e-02,  4.6776e-02,  6.4279e-03,
           1.6229e-03, -8.5571e-02, -3.1540e-02, -3.3691e-02,  2.6711e-02,
          -2.6120e-02,  2.1856e-02, -3.4932e-02,  2.1680e-02,  4.4602e-03],
         [-1.3977e-02, -5.5964e-02,  8.1749e-01,  3.7492e-02,  4.8204e-02,
           9.1372e-03, -3.8328e-02,  1.8379e-02,  3.8212e-02, -2.4670e-0

In [6]:
bayesian_regression.closed_form_obs_EIG(X_host),bayesian_regression.closed_form_causal_EIG(X_host)

(1.398887140503594, 0.26550232519802486)

In [7]:
bayesian_regression.samples_obs_EIG(X_host,100,10),bayesian_regression.samples_causal_EIG(X_host,100,10)

  out = random_state.multivariate_normal(mean, cov, size)


(1.9411918090116842, -0.8106184504626412)

In [8]:
T_zero = torch.zeros_like(T_host)
T_one = 1 - T_zero

X_host_times_T_zero = (T_zero.unsqueeze(dim=0).T * X_host_no_T)
X_host_times_T_one = (T_one.unsqueeze(dim=0).T * X_host_no_T)

X_host_T_zero = torch.concat([X_host_no_T,X_host_times_T_zero],dim=1)
X_host_T_one = torch.concat([X_host_no_T,X_host_times_T_one],dim=1)

Y_host = X_host @ mu
Y_host = (1/Y_host.norm()) * Y_host + sigma_error * torch.randn_like(Y_host)

In [9]:
bayesian_regression.closed_form_obs_EIG(X_host_T_zero),bayesian_regression.closed_form_causal_EIG(X_host_T_zero)

(1.004540117003199, -0.12884469830237033)

In [10]:
bayesian_regression.samples_obs_EIG(X_host_T_zero,100,500),bayesian_regression.samples_causal_EIG(X_host_T_zero,100,500)

(-0.04841631341710695, 0.06427715460080208)

In [11]:
bayesian_regression.closed_form_obs_EIG(X_host_T_one),bayesian_regression.closed_form_causal_EIG(X_host_T_one)

(1.593697584366769, 0.46031276906119967)

In [12]:
bayesian_regression.samples_obs_EIG(X_host_T_one,200,750),bayesian_regression.samples_causal_EIG(X_host_T_one,200,750)

(2.6058444168770336, 0.19467560287881724)

## Experiment

In [13]:
n_host_sample = 200 
sigma_error = 1 
d = 10 
n_repeats = 40

results_dict = {"E"}

## BART

In [14]:
import pymc as pm
import pymc_bart as pmb

In [15]:
X, y = np.array(X_host), np.array(Y_host)
with pm.Model() as model:
    bart = pmb.BART('sample', X=X, Y=y)
    idata = pm.sample()

DisconnectedInputError:  
Backtrace when that variable is created:

  File "/Users/jake/Python/causal_prospective_merge/.conda/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3334, in run_cell_async
    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  File "/Users/jake/Python/causal_prospective_merge/.conda/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3517, in run_ast_nodes
    if await self.run_code(code, result, async_=asy):
  File "/Users/jake/Python/causal_prospective_merge/.conda/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3577, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/v1/x4lr48v53397r9xsq4q70mvw0000gn/T/ipykernel_14989/2101801662.py", line 3, in <module>
    bart = pmb.BART('sample', X=X, Y=y)
  File "/Users/jake/Python/causal_prospective_merge/.conda/lib/python3.11/site-packages/pymc_bart/bart.py", line 173, in __new__
    return super().__new__(cls, name, *params, **kwargs)
  File "/Users/jake/Python/causal_prospective_merge/.conda/lib/python3.11/site-packages/pymc/distributions/distribution.py", line 413, in __new__
    rv_out = model.register_rv(
  File "/Users/jake/Python/causal_prospective_merge/.conda/lib/python3.11/site-packages/pymc/model/core.py", line 1249, in register_rv
    self.create_value_var(rv_var, transform)
  File "/Users/jake/Python/causal_prospective_merge/.conda/lib/python3.11/site-packages/pymc/model/core.py", line 1401, in create_value_var
    value_var = rv_var.type()


In [17]:
coal = np.loadtxt(pm.get_data("coal.csv"))

In [18]:
# discretize data
years = int(coal.max() - coal.min())
bins = years // 4
hist, x_edges = np.histogram(coal, bins=bins)
# compute the location of the centers of the discretized data
x_centers = x_edges[:-1] + (x_edges[1] - x_edges[0]) / 2
# xdata needs to be 2D for BART
x_data = x_centers[:, None]
# express data as the rate number of disaster per year
y_data = hist

In [20]:
with pm.Model() as model_coal:
    μ_ = pmb.BART("μ_", X=x_data, Y=np.log(y_data), m=20)
    μ = pm.Deterministic("μ", pm.math.exp(μ_))
    y_pred = pm.Poisson("y_pred", mu=μ, observed=y_data)
    idata_coal = pm.sample(random_seed=10)

Multiprocess sampling (4 chains in 4 jobs)
PGBART: [μ_]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 6 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details


In [22]:
idata_coal.sample_stats

In [None]:
az.plot_trace(idata, figsize=(10, 7));

In [None]:
from pymc import HalfCauchy, Model, Normal, sample


In [None]:
RANDOM_SEED = 8927
rng = np.random.default_rng(RANDOM_SEED)
size = 200
true_intercept = 1
true_slope = 2

x = np.linspace(0, 1, size)
# y = a + b*x
true_regression_line = true_intercept + true_slope * x
# add noise
y = true_regression_line + rng.normal(scale=0.5, size=size)

data = pd.DataFrame(dict(x=x, y=y))

In [None]:
with Model() as model:  # model specifications in PyMC are wrapped in a with-statement
    # Define priors
    sigma = HalfCauchy("sigma", beta=10)
    intercept = Normal("Intercept", 0, sigma=20)
    slope = Normal("slope", 0, sigma=20)

    # Define likelihood
    likelihood = Normal("y", mu=intercept + slope * x, sigma=sigma, observed=y)

    # Inference!
    # draw 3000 posterior samples using NUTS sampling
    idata = sample(3000)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, Intercept, slope]


Sampling 4 chains for 1_000 tune and 3_000 draw iterations (4_000 + 12_000 draws total) took 1 seconds.


In [None]:
idata

In [None]:
from pathlib import Path

import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc as pm
import pymc_bart as pmb

from sklearn.model_selection import train_test_split

%config InlineBackend.figure_format = "retina"
RANDOM_SEED = 5781
np.random.seed(RANDOM_SEED)
az.style.use("arviz-darkgrid")

In [None]:
# discretize data
years = int(coal.max() - coal.min())
bins = years // 4
hist, x_edges = np.histogram(coal, bins=bins)
# compute the location of the centers of the discretized data
x_centers = x_edges[:-1] + (x_edges[1] - x_edges[0]) / 2
# xdata needs to be 2D for BART
x_data = x_centers[:, None]
# express data as the rate number of disaster per year
y_data = hist

In [None]:
pmb.BART.register

<bound method ABCMeta.register of <class 'pymc_bart.bart.BART'>>

In [None]:
with pm.Model() as model_coal:
    μ_ = pmb.BART("μ_", X=x_data, Y=np.log(y_data), m=20)
    μ = pm.Deterministic("μ", pm.math.exp(μ_))
    y_pred = pm.Poisson("y_pred", mu=μ, observed=y_data)
    idata_coal = pm.sample(random_seed=RANDOM_SEED)

AttributeError: 'function' object has no attribute 'register'