In [1]:
from mliv.dgps import get_data, get_tau_fn, fn_dict


In [1]:
### module imports
%load_ext autoreload
%autoreload 2
import warnings
warnings.simplefilter('ignore')
import itertools
import numpy as np
import matplotlib.pyplot as plt 
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
import itertools
import os

### import from our files
from mliv.dgps import get_data, get_tau_fn, fn_dict
from mliv.neuralnet.utilities import log_metrics, plot_results, hyperparam_grid,\
                                     hyperparam_mult_grid, eval_performance
from mliv.neuralnet.mnist_dgps import AbstractMNISTxz
from mliv.neuralnet import AGMM,KernelLayerMMDGMM
from mliv.neuralnet.rbflayer import gaussian, inverse_multiquadric

# MNIST DGPs

## Convolutional Neural Networks

In [2]:
class CNN_Z_agmm(nn.Module):
    def __init__(self):
        super(CNN_Z_agmm, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = x  # F.log_softmax(x, dim=1)
        return output.squeeze()


class CNN_Z_kernel(nn.Module):
    def __init__(self, g_features=100):
        super(CNN_Z_kernel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, g_features)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = x  # F.log_softmax(x, dim=1)
        return output


class CNN_X(nn.Module):
    def __init__(self):
        super(CNN_X, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = x  # F.log_softmax(x, dim=1)
        return output.squeeze()

## Fully Connected Networks

In [3]:
def fc_z_kernel(n_z, n_hidden, g_features, dropout_p):
    FC_Z_kernel = nn.Sequential(
        nn.Dropout(p=dropout_p),
        nn.Linear(n_z, n_hidden),
        nn.LeakyReLU(),
        nn.Dropout(p=dropout_p),
        nn.Linear(n_hidden, g_features),
        nn.ReLU(),
    )
    return FC_Z_kernel


def fc_z_agmm(n_z, n_hidden, dropout_p):
    FC_Z_agmm = nn.Sequential(
        nn.Dropout(p=dropout_p),
        nn.Linear(n_z, n_hidden),
        nn.LeakyReLU(),
        nn.Dropout(p=dropout_p),
        nn.Linear(n_hidden, 1),
    )
    return FC_Z_agmm


def fc_x(n_t, n_hidden, dropout_p):
    FC_X = nn.Sequential(
        nn.Dropout(p=dropout_p),
        nn.Linear(n_t, n_hidden),
        nn.LeakyReLU(),
        nn.Dropout(p=dropout_p),
        nn.Linear(n_hidden, 1),
    )
    return FC_X

## Data Generation Function

In [4]:
def generate_data(
    X_IMAGE=False,
    Z_IMAGE=False,
    tau_fn="abs",
    n_samples=10000,
    n_instruments=2,
    iv_strength=0.5,
    device=None,
):
    mnist_dgp = AbstractMNISTxz(X_IMAGE, Z_IMAGE, tau_fn)
    n_test = n_samples // 10
    n_t = 1

    T, Z, Y, G, _ = mnist_dgp.generate_data(
        n_samples, tau_fn=tau_fn, n_instruments=n_instruments, iv_strength=iv_strength
    )

    T_test, Z_test, Y_test, G_test, _ = mnist_dgp.generate_data(
        n_test, tau_fn=tau_fn, n_instruments=n_instruments, iv_strength=iv_strength,
    )

    Z_train, Z_val, T_train, T_val, Y_train, Y_val, G_train, G_val = train_test_split(
        Z, T, Y, G, test_size=0.1, shuffle=True
    )
    Z_train, T_train, Y_train, G_train = map(
        lambda x: torch.Tensor(x), (Z_train, T_train, Y_train, G_train)
    )
    Z_val, T_val, Y_val, G_val = map(
        lambda x: torch.Tensor(x).to(device), (Z_val, T_val, Y_val, G_val)
    )
    Z_test, T_test, Y_test, G_test = map(
        lambda x: torch.Tensor(x).to(device), (Z_test, T_test, Y_test, G_test)
    )

    data_array = []
    data_array.append((Z_train, T_train, Y_train, G_train))
    data_array.append((Z_val, T_val, Y_val, G_val))
    data_array.append((Z_test, T_test, Y_test, G_test))
    return data_array

## Functions to Train AGMM and KernelLayerGMM estimators

In [17]:
### train AGMM
def train_agmm(
    Z_train,
    T_train,
    Y_train,
    G_train,
    Z_val,
    T_val,
    Y_val,
    G_val,
    T_test,
    G_test,
    X_IMAGE=False,
    Z_IMAGE=False,
    n_t=1,
    n_instruments=2,
    n_hidden=200,
    dropout_p=0.1,
    learner_lr=1e-4,
    adversary_lr=1e-4,
    learner_l2=1e-4,
    adversary_l2=1e-4,
    adversary_norm_reg=1e-4,
    n_epochs=100,
    batch_size=100,
    train_learner_every=1,
    train_adversary_every=1,
):
    if X_IMAGE:
        learner = CNN_X()
    else:
        learner = fc_x(n_t, n_hidden, dropout_p)
    if Z_IMAGE:
        adversary = CNN_Z_agmm()
    else:
        adversary = fc_z_agmm(n_instruments, n_hidden, dropout_p)

    def logger(learner, adversary, epoch, writer):
        if not X_IMAGE:
            writer.add_histogram("learner", learner[-1].weight, epoch)
        if not Z_IMAGE:
            writer.add_histogram("adversary", adversary[-1].weight, epoch)
        log_metrics(
            Z_val,
            T_val,
            Y_val,
            Z_val,
            T_val,
            Y_val,
            T_test,
            learner,
            adversary,
            epoch,
            writer,
            true_of_T=G_val,
        )

    np.random.seed(12356)
    print("---Hyperparameters---")
    print("Learner Learning Rate:", learner_lr)
    print("Adversary learning rate:", adversary_lr)
    print("Learner_l2:", learner_l2)
    print("Adversary_l2:", adversary_l2)
    print("Number of epochs:", n_epochs)
    print("Batch Size:", batch_size)
    agmm = AGMM(learner, adversary).fit(
        Z_train,
        T_train,
        Y_train,
        learner_lr=learner_lr,
        adversary_lr=adversary_lr,
        learner_l2=learner_l2,
        adversary_l2=adversary_l2,
        n_epochs=n_epochs,
        bs=batch_size,
        logger=None,
        model_dir="agmm_model",
        device='cuda',
        train_learner_every=train_learner_every,
        train_adversary_every=train_adversary_every,
    )

    return agmm


#### Train KernelLayerGMM
def train_kernellayergmm(
    Z_train,
    T_train,
    Y_train,
    G_train,
    Z_val,
    T_val,
    Y_val,
    G_val,
    T_test,
    G_test,
    g_features=100,
    kernel_fn=gaussian,
    centers=None,
    sigmas=None,
    X_IMAGE=False,
    Z_IMAGE=False,
    n_t=1,
    n_instruments=2,
    n_hidden=200,
    dropout_p=0.1,
    learner_lr=1e-4,
    adversary_lr=1e-4,
    learner_l2=1e-4,
    adversary_l2=1e-4,
    adversary_norm_reg=1e-4,
    n_epochs=100,
    batch_size=100,
    train_learner_every=1,
    train_adversary_every=1,
):
    if X_IMAGE:
        learner = CNN_X()
    else:
        learner = fc_x(n_t, n_hidden, dropout_p)
    if Z_IMAGE:
        adversary = CNN_Z_kernel(g_features)
    else:
        adversary = fc_z_kernel(n_instruments, n_hidden, g_features, dropout_p)

    def logger(learner, adversary, epoch, writer):
        if not X_IMAGE:
            writer.add_histogram("learner", learner[-1].weight, epoch)
        # if not Z_IMAGE:
        #  writer.add_histogram('adversary', adversary[-1].weight, epoch)
        writer.add_histogram("adversary", adversary.beta.weight, epoch)
        log_metrics(
            Z_val,
            T_val,
            Y_val,
            Z_val,
            T_val,
            Y_val,
            T_test,
            learner,
            adversary,
            epoch,
            writer,
            true_of_T=G_val,
        )

    np.random.seed(12356)
    print("---Hyperparameters---")
    print("Learner Learning Rate:", learner_lr)
    print("Adversary learning rate:", adversary_lr)
    print("Learner_l2:", learner_l2)
    print("Adversary_l2:", adversary_l2)
    print("Number of epochs:", n_epochs)
    print("Batch Size:", batch_size)
    print("G features", g_features)
    print("Number of centers", n_centers)
    print("Kernel function", kernel_fn.__name__)
    klayermmdgmm = KernelLayerMMDGMM(
        learner,
        adversary,
        g_features,
        n_centers,
        kernel_fn,
        centers=centers,
        sigmas=sigmas,
    )
    klayermmdgmm.fit(
        Z_train,
        T_train,
        Y_train,
        learner_l2=learner_l2,
        adversary_l2=adversary_l2,
        adversary_norm_reg=adversary_norm_reg,
        learner_lr=learner_lr,
        adversary_lr=adversary_lr,
        n_epochs=n_epochs,
        bs=bs,
        logger=logger,
        model_dir="klayer_model",
        device=device,
        train_learner_every=train_learner_every,
        train_adversary_every=train_adversary_every,
    )

    return klayermmdgmm

## Global hyper-parameters

In [11]:
device = torch.cuda.current_device() if torch.cuda.is_available() else None

## Z - Image

In [12]:
### Generate data
X_IMAGE = True
Z_IMAGE = True
tau_fn = "abs"
n_samples = 1000
n_instruments = 2
iv_strength = 0.5
data = generate_data(
    X_IMAGE=X_IMAGE,
    Z_IMAGE=Z_IMAGE,
    tau_fn=tau_fn,
    n_samples=n_samples,
    n_instruments=n_instruments,
    iv_strength=iv_strength,
    device=device,
)
(Z_train, T_train, Y_train, G_train) = data[0]
(Z_val, T_val, Y_val, G_val) = data[1]
(Z_test, T_test, Y_test, G_test) = data[2]

### Train AGMM - Z image

In [13]:
%%time
# parameters for networks
dropout_p = 0.1
n_t = 1
n_hidden = 200

# local hyperparam
learner_lr = 1e-4
adversary_lr = 1e-4
learner_l2 = 1e-4
adversary_l2 = 1e-4
adversary_norm_reg = 1e-4
n_epochs = 1
bs = 100
agmm = train_agmm(Z_train, T_train, Y_train, G_train, Z_val, T_val, Y_val, G_val, T_test, G_test,
                  X_IMAGE=True, Z_IMAGE=True, n_t=n_t, n_instruments=n_instruments,
                  n_hidden=n_hidden, dropout_p=dropout_p, learner_lr=learner_lr, adversary_lr=adversary_lr,
                  learner_l2=learner_l2, adversary_l2=adversary_l2, adversary_norm_reg=adversary_norm_reg,
                  n_epochs=n_epochs, batch_size=bs)


#plot_results(agmm, T_test, true_of_T_test=G_test)
eval_performance(agmm,T_test, true_of_T_test=G_test)

NameError: name 'train_agmm' is not defined

### Train Kernel GMM - Z image

In [14]:
dropout_p = 0.1  # for dropout
n_t = 1  # number of treatments
n_hidden = 200

# For any method that use a projection of z into features g(z)
g_features = 100
n_centers = 100
sigma = 2.0 / g_features
# The kernel function
kernel_fn = gaussian
centers = np.random.uniform(-4, 4, size=(n_centers, 100))
sigmas = np.ones((n_centers,)) * sigma

# local hyperparam
learner_lr = 1e-4
adversary_lr = 1e-4
learner_l2 = 1e-4
adversary_l2 = 1e-4
adversary_norm_reg = 1e-4
n_epochs = 1
bs = 100

klayermmdgmm = train_kernellayergmm(
    Z_train,
    T_train,
    Y_train,
    G_train,
    Z_val,
    T_val,
    Y_val,
    G_val,
    T_test,
    G_test,
    g_features=g_features,
    kernel_fn=kernel_fn,
    centers=centers,
    sigmas=sigmas,
    X_IMAGE=False,
    Z_IMAGE=True,
    n_t=n_t,
    n_instruments=n_instruments,
    n_hidden=n_hidden,
    dropout_p=dropout_p,
    learner_lr=learner_lr,
    adversary_lr=adversary_lr,
    learner_l2=learner_l2,
    adversary_l2=adversary_l2,
    adversary_norm_reg=adversary_norm_reg,
    n_epochs=n_epochs,
    batch_size=bs,
)

plot_results(klayermmdgmm, T_test, true_of_T_test=G_test)

NameError: name 'train_kernellayergmm' is not defined

## X - Image

In [15]:
### Generate Data
if torch.cuda.is_available():
    torch.cuda.empty_cache()
X_IMAGE = True
Z_IMAGE = False
tau_fn = "abs"
n_samples = 20000
n_instruments = 1  # need to keep this to 1 for now. some bug in the way Bennett et al generate their data.
iv_strength = 0.5
data = generate_data(
    X_IMAGE=X_IMAGE,
    Z_IMAGE=Z_IMAGE,
    tau_fn=tau_fn,
    n_samples=n_samples,
    n_instruments=n_instruments,
    iv_strength=iv_strength,
    device=device,
)
(Z_train, T_train, Y_train, G_train) = data[0]
(Z_val, T_val, Y_val, G_val) = data[1]
(Z_test, T_test, Y_test, G_test) = data[2]

### Train AGMM - X image

In [16]:
# Train AGMM
#%%time

# if torch.cuda.is_available():
#  torch.cuda.empty_cache()
p = 0.1  # for dropout
n_t = 1  # number of treatments
n_hidden = 200

# local hyperparam
learner_lr = 1e-5
adversary_lr = 1e-4
learner_l2 = 1e-4
adversary_l2 = 1e-4
adversary_norm_reg = 1e-4
n_epochs = 1
bs = 100
train_learner_every = 1
train_adversary_every = 1

agmm = train_agmm(
    Z_train,
    T_train,
    Y_train,
    G_train,
    Z_val,
    T_val,
    Y_val,
    G_val,
    T_test,
    G_test,
    X_IMAGE=X_IMAGE,
    Z_IMAGE=Z_IMAGE,
    n_t=n_t,
    n_instruments=n_instruments,
    n_hidden=n_hidden,
    dropout_p=dropout_p,
    learner_lr=learner_lr,
    adversary_lr=adversary_lr,
    learner_l2=learner_l2,
    adversary_l2=adversary_l2,
    adversary_norm_reg=adversary_norm_reg,
    n_epochs=n_epochs,
    batch_size=bs,
    train_learner_every=train_learner_every,
    train_adversary_every=train_adversary_every,
)

eval_performance(agmm, T_test, true_of_T_test=G_test)

NameError: name 'train_agmm' is not defined

### Train Kernel GMM - X image

In [None]:
# use KernelLayerGMM for DGP 2: only X image

# For any method that use a projection of z into features g(z)
g_features = 100
n_centers = 100
sigma = 2.0 / g_features
# The kernel function
kernel_fn = gaussian

centers = np.random.uniform(-4, 4, size=(n_centers, 100))
sigmas = np.ones((n_centers,)) * sigma

dropout_p = 0.1  # for dropout
n_t = 1  # number of treatments
n_hidden = 200
# local hyperparam
learner_lr = 1e-5
adversary_lr = 1e-4
learner_l2 = 1e-4
adversary_l2 = 1e-4
adversary_norm_reg = 1e-4
n_epochs = 1
bs = 100
train_learner_every = 1
train_adversary_every = 1

klayermmdgmm = train_kernellayergmm(
    Z_train,
    T_train,
    Y_train,
    G_train,
    Z_val,
    T_val,
    Y_val,
    G_val,
    T_test,
    G_test,
    g_features=g_features,
    kernel_fn=kernel_fn,
    centers=centers,
    sigmas=sigmas,
    X_IMAGE=X_IMAGE,
    Z_IMAGE=Z_IMAGE,
    n_t=n_t,
    n_instruments=n_instruments,
    n_hidden=n_hidden,
    dropout_p=dropout_p,
    learner_lr=learner_lr,
    adversary_lr=adversary_lr,
    learner_l2=learner_l2,
    adversary_l2=adversary_l2,
    adversary_norm_reg=adversary_norm_reg,
    n_epochs=n_epochs,
    batch_size=bs,
    train_learner_every=train_learner_every,
    train_adversary_every=train_adversary_every,
)

eval_performance(klayermmdgmm, T_test, true_of_T_test=G_test)

## X and Z - Images

In [18]:
### Generate Data
if torch.cuda.is_available():
    torch.cuda.empty_cache()
X_IMAGE = True
Z_IMAGE = True
tau_fn = "abs"
n_samples = 20000
n_instruments = 2
iv_strength = 0.5
data = generate_data(
    X_IMAGE=X_IMAGE,
    Z_IMAGE=Z_IMAGE,
    tau_fn=tau_fn,
    n_samples=n_samples,
    n_instruments=n_instruments,
    iv_strength=iv_strength,
    device=device,
)
(Z_train, T_train, Y_train, G_train) = data[0]
(Z_val, T_val, Y_val, G_val) = data[1]
(Z_test, T_test, Y_test, G_test) = data[2]

### Train AGMM - X and Z images

In [19]:
torch.cuda.empty_cache()
label = G_train.squeeze()
label

tensor([-0.1527, -0.7361, -0.1527,  ..., -0.1527, -0.1527,  0.4308])

In [20]:
#%%time
dropout_p = 0.1  # for dropout
n_t = 1  # number of treatments
n_hidden = 200

# local hyperparam
learner_lr = 1e-4
adversary_lr = 1e-4
learner_l2 = 1e-4
adversary_l2 = 1e-4
adversary_norm_reg = 1e-4
n_epochs = 100
bs = 5000


agmm = train_agmm(
    Z_train,
    T_train,
    Y_train,
    G_train,
    Z_val,
    T_val,
    Y_val,
    G_val,
    T_test,
    G_test,
    X_IMAGE=X_IMAGE,
    Z_IMAGE=Z_IMAGE,
    n_t=n_t,
    n_instruments=n_instruments,
    n_hidden=n_hidden,
    dropout_p=dropout_p,
    learner_lr=learner_lr,
    adversary_lr=adversary_lr,
    learner_l2=learner_l2,
    adversary_l2=adversary_l2,
    adversary_norm_reg=adversary_norm_reg,
    n_epochs=100,
    batch_size=bs,
    train_learner_every=1,
    train_adversary_every=1
)

eval_performance(agmm, T_test, true_of_T_test=G_test)

---Hyperparameters---
Learner Learning Rate: 0.0001
Adversary learning rate: 0.0001
Learner_l2: 0.0001
Adversary_l2: 0.0001
Number of epochs: 100
Batch Size: 5000
R2avg -0.017229437828063965
R2fin -0.023673295974731445
MSEavg 0.18663625
MSEfin 0.18781854


(-0.017229437828063965, -0.023673295974731445, 0.18663625, 0.18781854)

In [21]:
torch.min(G_test)

tensor(-0.7599, device='cuda:0')

In [None]:
#MSELOSS training

In [14]:
import torch.nn as nn
class DeepSet(nn.Module):
    def __init__(self, x_d, pool="max"):
        super().__init__()
        self.enc = nn.Sequential(
            nn.Linear(in_features=x_d, out_features=64),
            nn.ReLU(),
            nn.Linear(in_features=64, out_features=64),
            nn.ReLU(),
            nn.Linear(in_features=64, out_features=64),
            nn.ReLU(),
            nn.Linear(in_features=64, out_features=64),
        )
        self.dec = nn.Sequential(
            nn.Linear(in_features=64, out_features=64),
            nn.ReLU(),
            nn.Linear(in_features=64, out_features=1),
        )
        self.pool = pool

    def forward(self, x):
        x = self.enc(x)
        x = x.sum(dim=1)
        x = self.dec(x)
        return x

def train(model, x, label):
    model = model.cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    criterion = nn.MSELoss().cuda()
    losses = []
    for _ in range(10000):
        loss = criterion(model(x).squeeze(), label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
    return losses
        

In [15]:
T_train.shape

torch.Size([18000, 1, 28, 28])

In [16]:
model = DeepSet(x_d = len(T_train[0][0][0]))
g_loss = train(model, T_train.cuda(), G_train.cuda())

In [17]:
g_loss[-1]

0.8363448977470398

### Train Kernel GMM - X and Z images

In [None]:
#%%time
# For any method that use a projection of z into features g(z)
g_features = 100
n_centers = 100
sigma = 2.0 / g_features
# The kernel function
kernel_fn = gaussian

centers = np.random.uniform(-4, 4, size=(n_centers, 100))
sigmas = np.ones((n_centers,)) * sigma

dropout_p = 0.1 # for dropout
n_t = 1 # number of treatments
n_hidden = 200

# training hyperparameters
learner_lr = 1e-5
adversary_lr = 1e-4
learner_l2 = 1e-4
adversary_l2 = 1e-4
adversary_norm_reg = 1e-4
n_epochs = 1
bs = 100
train_learner_every = 1
train_adversary_every = 1


klayermmdgmm = train_kernellayergmm(Z_train, T_train, Y_train, G_train,
                                    Z_val, T_val, Y_val, G_val, T_test, G_test,
                                    g_features=g_features, kernel_fn=kernel_fn,
                                    centers=centers, sigmas=sigmas,
                                    X_IMAGE=X_IMAGE, Z_IMAGE=Z_IMAGE, n_t=n_t,
                                    n_instruments=n_instruments,
                                    n_hidden=n_hidden, dropout_p=dropout_p, 
                                    learner_lr=learner_lr, adversary_lr=adversary_lr,
                                    learner_l2=learner_l2, adversary_l2=adversary_l2,
                                    adversary_norm_reg=adversary_norm_reg,
                                    n_epochs=n_epochs, batch_size=bs,
                                    train_learner_every=train_learner_every,
                                    train_adversary_every=train_adversary_every)

eval_performance(klayermmdgmm,T_test, true_of_T_test=G_test)

## Multiple Experiments at Once

In [None]:
def dgp_to_bools(dgp_str):
    x = False
    z = False
    if dgp_str=='mnist_x':
        x = True
    elif dgp_str== 'mnist_z':
        z = True
    elif dgp_str=='mnist_xz':
        x = True
        z = True
    return x,z

def experiment(dgp,iv_strength,tau_fn,num_data,est):
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    print("Experiment")
    print(dgp,est,iv_strength,tau_fn)

    # Fixed hyper-parameters - can vary these too
    # params for kernelgmm
    g_features = 100
    kernel_fn = gaussian
    centers = np.random.uniform(-4, 4, size=(n_centers, 100))
    sigmas = np.ones((n_centers,)) * sigma

    # arch params
    dropout_p = 0.1 # for dropout
    n_t = 1 # number of treatments
    n_hidden = 200

    # training params
    learner_lr = 1e-5
    adversary_lr = 1e-4
    learner_l2 = 1e-4
    adversary_l2 = 1e-4
    adversary_norm_reg = 1e-4
    n_epochs = 1
    bs = 100
    train_learner_every = 1
    train_adversary_every = 1

    # generate data
    X_IMAGE,Z_IMAGE = dgp_to_bools(dgp)
    n_instruments = 1
    data = generate_data(X_IMAGE=X_IMAGE, Z_IMAGE=Z_IMAGE, tau_fn=tau_fn, n_samples=num_data,
                    n_instruments=n_instruments, iv_strength=iv_strength, device=device)
    (Z_train, T_train, Y_train, G_train) = data[0]
    (Z_val, T_val, Y_val, G_val) = data[1]
    (Z_test, T_test, Y_test, G_test) = data[2]

    if est=='AGMM':
        estimator = train_agmm(Z_train, T_train, Y_train, G_train,
                               Z_val, T_val, Y_val, G_val, T_test, G_test,
                               X_IMAGE=X_IMAGE, Z_IMAGE=Z_IMAGE, n_t=n_t,
                               n_instruments=n_instruments,
                               n_hidden=n_hidden, dropout_p=dropout_p,
                               learner_lr=learner_lr, adversary_lr=adversary_lr,
                               learner_l2=learner_l2, adversary_l2=adversary_l2,
                               adversary_norm_reg=adversary_norm_reg, n_epochs=n_epochs,
                               batch_size=bs,
                               train_learner_every=train_learner_every,
                               train_adversary_every=train_adversary_every)

    elif est=='KernelLayerMMDGMM':
        estimator = train_kernellayergmm(Z_train, T_train, Y_train, G_train,
                                         Z_val, T_val, Y_val, G_val, T_test, G_test,
                                         g_features=g_features, kernel_fn=kernel_fn,
                                         centers=centers, sigmas=sigmas,
                                         X_IMAGE=X_IMAGE, Z_IMAGE=Z_IMAGE, n_t=n_t,
                                         n_instruments=n_instruments,
                                         n_hidden=n_hidden, dropout_p=dropout_p,
                                         learner_lr=learner_lr, adversary_lr=adversary_lr,
                                         learner_l2=learner_l2, adversary_l2=adversary_l2,
                                         adversary_norm_reg=adversary_norm_reg,
                                         n_epochs=n_epochs, batch_size=bs,
                                         train_learner_every=train_learner_every,
                                         train_adversary_every=train_adversary_every)

    results = eval_performance(estimator, T_test, true_of_T_test=G_test)
    return results


In [None]:
# Monte-Carlo Loop
# 1. loop over functions
# 2. loop over instrument strengths
# 3. loop over estimators
# 4. loop over dgps

tau_fns = ["abs", "sin", "2dpoly", "rand_pw", "3dpoly"]
iv_strength = [0.2, 0.5, 0.8]
estimators = ["AGMM", "KernelLayerMMDGMM"]
dgps = ["z_image", "x_image", "xz_image"]
num_datas = [1000]


hyperparams = list(itertools.product(tau_fns, iv_strength, dgps, num_datas, estimators))
result_dict = {}

for (tau_fn, iv_strength, dgp, num_data, est) in hyperparams:
    results = experiment(dgp, iv_strength, tau_fn, num_data, est)
    result_dict[(tau_fn, iv_strength, dgp, num_data, est)] = results