In [1]:
# necessary packages
import numpy as np
import torch
import time
import argparse
import pandas as pd

from generate_synthetic_data import generate_synthetic_data, generate_E_random
from metrics import PEHE
from models import ERM_Sblock, ERM_Tblock
from irm_block import envs_irm_S, envs_irm_T, IRM_Sblock, IRM_Tblock

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument(
    '--nr',
    help='number of repetitions',
    default=10,
    type=int)
parser.add_argument(
    '-nd',
    '--item', action='store', nargs='*', dest='nd',
    help='list of dimension size', 
    default='5,10,20,35,50',
    type=str)
parser.add_argument(
    '--ne',
    help='number of environments',
    default=2,
    type=int)
parser.add_argument(
    '--ntr',
    help='number of training observations',
    default=200,
    type=int)
parser.add_argument(
    '--nte',
    help='number of testing observations',
    default=100,
    type=int)
parser.add_argument(
    '--mu',
    help='mu',
    default=0.1,
    type=float)
parser.add_argument(
    '--outcome_model',
    help='outcome_model',
    default="quadratic",
    type=str)
parser.add_argument(
    '--feature_model',
    help='feature_model',
    default="B",
    type=str)
parser.add_argument(
    '--sigma_outcome',
    help='sigma_outcome',
    default=1,
    type=int)
parser.add_argument(
    '--model_type',
    help='linear regression or ridge CV',
    default='RidgeCV',
    type=str)
parser.add_argument(
    '--number_IRM_iterations',
    help='number of IRM iterations',
    default=10000,
    type=int)
parser.add_argument(
    '--lr',
    help='IRM lr',
    default=1e-3,
    type=float)

args = parser.parse_args() 


In [3]:
number_dimension_list = [int(float(item)) for item in args.nd[0].split(',')]

In [4]:
irm_S_pehe = np.zeros((args.nr, len(number_dimension_list)))
irm_T_pehe = np.zeros((args.nr, len(number_dimension_list)))
erm_S_pehe = np.zeros((args.nr, len(number_dimension_list)))
erm_T_pehe = np.zeros((args.nr, len(number_dimension_list)))

In [5]:
X_train, T_train, y_train, X_test, T_test, test_potential_outcome = generate_synthetic_data()

In [11]:
X_train

array([[ 1.66770037,  0.26435887, -0.29209425, -1.29216819,  1.48984281],
       [ 0.43615835, -1.02891598, -0.6376782 , -0.01175637, -1.81997623],
       [ 0.12669729,  1.79769715,  0.72257435,  0.74122936, -1.55117469],
       [ 0.9643104 ,  0.40319075,  1.4866204 ,  0.41342035,  0.98211995],
       [-0.57168339, -0.82275006, -0.5501343 ,  0.88079981,  0.33696881],
       [-0.66198971, -0.20726169, -1.58945131,  0.33947604,  0.87399586],
       [-1.14786834,  0.04781062, -0.71348363,  0.73652566, -0.03203797],
       [-0.42558067, -0.4698951 , -0.96318167, -1.34142536,  0.56308151],
       [ 0.42958014,  1.44414606,  0.68030386, -1.36126216, -0.24257178],
       [ 0.0877633 , -0.03349469,  0.22660444,  0.20396656, -0.53833209],
       [-0.30594226, -3.17351332, -0.53519244,  2.05107163, -0.45556094],
       [ 1.05568199,  0.1092244 ,  0.69269397,  0.08619513, -1.97651231],
       [ 0.68395839,  0.3894975 ,  1.13991258,  1.53952366, -0.44356844],
       [ 0.34173219, -0.68631296,  0.0

In [10]:
E = generate_E_random(args.ntr, args.ne)
environments, featuresTest_control, featuresTest_treatment = envs_irm_S(X_train, X_test, T_train, y_train, E, args.ne)

In [26]:
len(featuresTest_control)

100

In [27]:
irm_coeff, irm_potential_outcome_S, irm_ate_S = IRM_Sblock(environments, featuresTest_control, featuresTest_treatment, args)

In [29]:
len(irm_coeff)

12

In [35]:
from irm_block import InvariantRiskMinimization

In [36]:
irm = InvariantRiskMinimization(environments, args)

In [37]:
phi = irm.get_phi()

In [40]:
w = irm.get_w()

In [42]:
w.shape

torch.Size([12, 1])

In [47]:
environments[0][0].shape

torch.Size([99, 12])

In [7]:
# IRM T
T0_index = np.where(T_train == 0)[0]
T1_index = np.where(T_train == 1)[0]

In [8]:
T0_index

array([  0,   1,   2,   3,   9,  10,  11,  16,  19,  20,  23,  25,  26,
        29,  35,  36,  40,  41,  45,  48,  49,  51,  52,  54,  55,  56,
        57,  62,  63,  66,  71,  73,  74,  75,  76,  80,  83,  86,  87,
        88,  89,  91,  93,  96,  97,  98, 101, 104, 105, 106, 109, 110,
       114, 115, 116, 118, 119, 121, 123, 126, 127, 131, 135, 140, 141,
       143, 144, 146, 147, 149, 150, 155, 156, 157, 158, 159, 160, 161,
       165, 166, 167, 169, 173, 174, 177, 178, 179, 181, 183, 187, 189,
       190, 192, 194, 195, 196, 197, 199])

In [11]:
environments_control = envs_irm_T(X_train[T0_index,:], y_train[T0_index,:], E[T0_index,:], args.ne)
environments_treatment = envs_irm_T(X_train[T1_index,:], y_train[T1_index,:], E[T1_index,:], args.ne)

42
56
56
46


In [15]:
environments_control[0][0].shape

torch.Size([42, 6])

In [16]:
features_IRM_T = torch.cat((torch.ones(X_test.shape[0], 1), torch.from_numpy(X_test.astype(np.float32))), 1)

In [19]:
features_IRM_T.shape

torch.Size([100, 6])

In [None]:
irm_coeff_c, irm_coeff_t, irm_potential_outcome_T, irm_ate_T = IRM_Tblock(environments_control, environments_treatment, features_IRM_T, args)

In [21]:
from irm_block import InvariantRiskMinimization
irm0 = InvariantRiskMinimization(environments_control, args)

In [22]:
phi = irm0.get_phi()
w = irm0.get_w()

In [23]:
phi

tensor([[ 0.7766,  0.3440,  0.3753,  0.3021, -0.3712,  0.3558],
        [ 0.2380,  1.0850,  0.1368,  0.1478,  0.2814,  0.0376],
        [ 0.1544,  0.1883,  1.2229,  0.2157,  0.2282,  0.1711],
        [ 0.0977,  0.2064,  0.2415,  1.2208,  0.1791,  0.1956],
        [-0.2434,  0.4058,  0.1486,  0.0837,  0.6862,  0.5773],
        [ 0.2180,  0.0156,  0.0687,  0.0766,  0.2242,  0.9638]],
       grad_fn=<CloneBackward>)