In [1]:
import sys
from torch.autograd import grad
import os
import argparse
import pandas as pd
import numpy as np
import torch
sys.path.append(r"../")
sys.path.append(r"../../")
sys.path.append('/home/wyliu/code/CB-IV')
from utils import log, CausalDataset
from module.SynCBIV import run as run_SynCBIV

os.environ["CUDA_VISIBLE_DEVICES"] = '1'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

def get_args():
    argparser = argparse.ArgumentParser(description=__doc__)
    # About run setting !!!!
    argparser.add_argument('--seed',default=2021,type=int,help='The random seed')
    argparser.add_argument('--mode',default='vx',type=str,help='The choice of v/x/vx/xx')
    argparser.add_argument('--ood',default=-3.0,type=float,help='The train dataset of OOD')
    argparser.add_argument('--ood_test',default=3.0,type=float,help='The train dataset of OOD')
    argparser.add_argument('--rewrite_log',default=False,type=bool,help='Whether rewrite log file')
    argparser.add_argument('--use_gpu',default=1,type=int,help='The use of GPU')
    argparser.add_argument('--des_str',default='/_/',type=str,help='The description of this running')
    argparser.add_argument('--oodtestall',default=0,type=int,help='The random seed')
    argparser.add_argument('--iter',default=3000,type=int,help='The num of iterations')
    # About data setting ~~~~
    argparser.add_argument('--num',default=10000,type=int,help='The num of train\val\test dataset')
    argparser.add_argument('--num_reps',default=100,type=int,help='The num of train\val\test dataset')
    argparser.add_argument('--ate',default=0,type=float,help='The ate of constant')
    argparser.add_argument('--sc',default=1,type=float,help='The sc')
    argparser.add_argument('--sh',default=0,type=float,help='The sh')
    argparser.add_argument('--one',default=1,type=int,help='The dim of Instrumental variables V')
    argparser.add_argument('--depX',default=0.05,type=float,help='Whether generates harder datasets')
    argparser.add_argument('--depU',default=0.05,type=float,help='Whether generates harder datasets')
    argparser.add_argument('--VX',default=1,type=int,help='The dim of Instrumental variables V')
    argparser.add_argument('--mV',default=2,type=int,help='The dim of Instrumental variables V')
    argparser.add_argument('--mX',default=10,type=int,help='The dim of Confounding variables X')
    argparser.add_argument('--mU',default=4,type=int,help='The dim of Unobserved confounding variables U')
    argparser.add_argument('--mXs',default=2,type=int,help='The dim of Noise variables X')
    argparser.add_argument('--storage_path',default='../../Data/',type=str,help='The dir of data storage')
    # Syn
    argparser.add_argument('--syn_alpha',default=0.01,type=float,help='')
    argparser.add_argument('--syn_lambda',default=0.001,type=float,help='')
    argparser.add_argument('--syn_twoStage',default=True,type=bool,help='')
    argparser.add_argument('--lrate',default=0.001,type=float,help='learning rate')
    # About Debug or Show
    argparser.add_argument('--verbose',default=1,type=int,help='The level of verbose')
    argparser.add_argument('--epoch_show',default=5,type=int,help='The epochs of show time')
    # About Regression_t
    argparser.add_argument('--regt_batch_size',default=500,type=int,help='The size of one batch')
    argparser.add_argument('--regt_lr',default=0.1,type=float,help='The learning rate')
    argparser.add_argument('--regt_num_epoch',default=5,type=int,help='The num of total epoch')
    # About IRM  
    argparser.add_argument('--env_list',default=[-3.0, -1.5, 1.5],type=list,help='The environment list')
    argparser.add_argument('--data_dict',default={},type=dict,help='The data dict')
    # args = argparser.parse_args()
    args = argparser.parse_args(args=[])
    return args

args = get_args()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import os
import random
import numpy as np
from sklearn.utils import shuffle
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

from utils import set_seed, log

def get_gain(activation):
    if activation.__class__.__name__ == "LeakyReLU":
        gain = nn.init.calculate_gain("leaky_relu",
                                            activation.negative_slope)
    else:
        activation_name = activation.__class__.__name__.lower()
        try:
            gain = nn.init.calculate_gain(activation_name)
        except ValueError:
            gain = 1.0
    return gain

# input_dim：输入数据的维度。
# layer_widths：一个整数列表，表示隐藏层的宽度。
# activation：激活函数（默认为 None）。
# last_layer：可选的最后一层，可以是任何 nn.Module 的子类（默认为 None）。
# num_out：输出的维度（默认为 1）。
class MLPModel(nn.Module):
    def __init__(self, input_dim, layer_widths, activation=None,last_layer=None, num_out=1):
        nn.Module.__init__(self)
        self.gain=get_gain(activation)
        # 根据隐藏层的宽度列表 layer_widths，
        # 创建一系列的线性层（nn.Linear），
        # 并可选择地在每个线性层之后添加给定的激活函数 activation。
        # 最后，根据输出维度 num_out 添加最后一层线性层。
        if len(layer_widths) == 0:
            layers = [nn.Linear(input_dim, num_out)]
        else:
            num_layers = len(layer_widths)
            if activation is None:
                layers = [nn.Linear(input_dim, layer_widths[0])]
            else:
                layers = [nn.Linear(input_dim, layer_widths[0]), activation]
            for i in range(1, num_layers):
                w_in = layer_widths[i-1]
                w_out = layer_widths[i]
                if activation is None:
                    layers.extend([nn.Linear(w_in, w_out)])
                else:
                    layers.extend([nn.Linear(w_in, w_out), activation])
            layers.append(nn.Linear(layer_widths[-1], num_out))
        if last_layer:
            layers.append(last_layer)
        self.model = nn.Sequential(*layers)

    def initialize(self, gain=1.0):
        # initialize 方法用于初始化模型的参数。
        for layer in self.model[:-1]:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_normal_(layer.weight.data, gain=self.gain)
                nn.init.zeros_(layer.bias.data)
        final_layer = self.model[-1]
        nn.init.xavier_normal_(final_layer.weight.data, gain=gain)
        nn.init.zeros_(final_layer.bias.data)

    def forward(self, data):
        # print("forward", data.shape)
        num_data = data.shape[0]
        data = data.view(num_data, -1)
        return self.model(data)

class MultipleMLPModel(nn.Module):
    def __init__(self, input_dim, layer_widths, num_models=1, activation=None,last_layer=None, num_out=1):
        nn.Module.__init__(self)
        self.models = nn.ModuleList([MLPModel(
            input_dim, layer_widths, activation=activation,
            last_layer=last_layer, num_out=num_out) for _ in range(num_models)])
        self.num_models = num_models

    def forward(self, data):
        num_data = data.shape[0]
        data = data.view(num_data, -1)
        outputs = [self.models[i](data) for i in range(self.num_models)]
        return torch.cat(outputs, dim=1)

def run(exp, args, dataDir, resultDir, train, val, test, device, r):
    batch_size = args.regt_batch_size
    print('args.regt_lr ',args.regt_lr)
    lr = args.regt_lr
    num_epoch = args.regt_num_epoch
    logfile = f'{resultDir}/log.txt'
    _logfile = f'{resultDir}/Regression.txt'
    set_seed(args.seed)

    try:
        train.to_tensor()
        val.to_tensor()
        test.to_tensor()
    except:
        pass

    train_loader = DataLoader(train, batch_size=batch_size)
    if args.mode == 'v':
        input_dim = args.mV
        train_input = train.v
        val_input = val.v
        test_input = test.v
    elif args.mode == 'x':
        input_dim = args.mX + args.mXs
        train_input = torch.cat((train.x, train.xs),1)
        val_input = torch.cat((val.x, val.xs),1)
        test_input = torch.cat((test.x, test.xs),1)
    else:
        input_dim = args.mV + args.mX + args.mXs
        # print("input dim:", input_dim)
        train_input = torch.cat((train.v, train.x, train.xs),1)
        val_input = torch.cat((val.v, val.x, val.xs),1)
        test_input = torch.cat((test.v, test.x, test.xs),1)

    
    mlp = MLPModel(input_dim, layer_widths=[128, 64], activation=nn.ReLU(),last_layer=nn.BatchNorm1d(2), num_out=2)
    net = nn.Sequential(mlp)
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss_func = torch.nn.CrossEntropyLoss()

    for epoch in range(num_epoch):
        log(logfile, f"Exp {exp} :this is the {epoch}/{num_epoch} epochs.")
        log(_logfile, f"Exp {exp} :this is the {epoch}/{num_epoch} epochs.", False)
        for idx, inputs in enumerate(train_loader):
            u = inputs['u']
            v = inputs['v']
            x = torch.cat((inputs['x'], inputs['xs']), 1)
            t = inputs['t'].reshape(-1).type(torch.LongTensor)
            # print("x:", x.shape)
            # print("args.mode:",args.mode)
            if args.mode == 'v':
                input_batch = v
            elif args.mode == 'x':
                input_batch = x
                # print("input_batch:", input_batch.shape)
            else:
                input_batch = torch.cat((v, x),1)
            
            prediction = net(input_batch) 
            loss = loss_func(prediction, t)

            optimizer.zero_grad()  
            loss.backward()        
            optimizer.step()    

        log(logfile, 'The train accuracy: {:.2f} %'.format((torch.true_divide(sum(train.t.reshape(-1) == torch.max(F.softmax(net(train_input) , dim=1), 1)[1]), len(train.t))).item() * 100))
        log(_logfile, 'The test  accuracy: {:.2f} %'.format((torch.true_divide(sum(test.t.reshape(-1) == torch.max(F.softmax(net(test_input) , dim=1), 1)[1]), len(test.t))).item() * 100))

    train.s = F.softmax(net(train_input) , dim=1)[:,1:2]
    val.s = F.softmax(net(val_input) , dim=1)[:,1:2]
    test.s = F.softmax(net(test_input) , dim=1)[:,1:2]
    ''' bias rate 1'''
    br = [-3.0, -2.5, -2.0, -1.5, -1.3, 1.3, 1.5, 2.0, 2.5, 3.0]
    brdc = {-3.0: 'n30', -2.5:'n25', -2.0:'n20', -1.5:'n15', -1.3:'n13', 1.3:'p13', 1.5:'p15', 2.0:'p20', 2.5:'p25', 3.0:'p30', 0.0:'0'}

    return train,val,test


def run_ood_IRM(exp, args, dataDir, resultDir, train, val, test, ood_test_dict=None):
    batch_size = args.regt_batch_size
    lr = args.regt_lr
    num_epoch = args.regt_num_epoch
    len_loader = 0
    logfile = f'{resultDir}/log.txt'
    _logfile = f'{resultDir}/Regression.txt'
    set_seed(args.seed)

    try:
        train.to_tensor()
        val.to_tensor()
        test.to_tensor()
    except:
        pass
    
    if args.mode == 'v':
        input_dim = args.mV
        train_input = train.v
        val_input = val.v
        test_input = test.v
    elif args.mode == 'x':
        input_dim = args.mX + args.mXs
        train_input = torch.cat((train.x, train.xs),1)
        val_input = torch.cat((val.x, val.xs),1)
        test_input = torch.cat((test.x, test.xs),1)
    else:
        input_dim = args.mV + args.mX + args.mXs
        # print("input dim:", input_dim)
        train_input = torch.cat((train.v, train.x, train.xs),1)
        val_input = torch.cat((val.v, val.x, val.xs),1)
        test_input = torch.cat((test.v, test.x, test.xs),1)

    for r in args.env_list:
        train_temp = args.data_dict[r]['train']
        val_temp = args.data_dict[r]['val']
        test_temp = args.data_dict[r]['test']

        try:
            train_temp.to_tensor()
            val_temp.to_tensor()
            test_temp.to_tensor()
        except:
            pass
        
        args.data_dict[r]['trainloader_reg'] = DataLoader(train_temp, batch_size=batch_size)
        len_loader= len(args.data_dict[r]['trainloader_reg'])

    mlp = MLPModel(input_dim, layer_widths=[128, 64], activation=nn.ReLU(),last_layer=nn.BatchNorm1d(2), num_out=2)
    net = nn.Sequential(mlp)
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss_func = torch.nn.CrossEntropyLoss()
    dummy_w = torch.nn.Parameter(torch.Tensor([1.0]))
    reg = 1e-1

    for epoch in range(num_epoch):
        log(logfile, f"Exp {exp} :this is the {epoch}/{num_epoch} epochs.")
        log(_logfile, f"Exp {exp} :this is the {epoch}/{num_epoch} epochs.", False)
        train_loaders = [iter(args.data_dict[r]['trainloader_reg']) for r in args.env_list]
        for _ in range(len_loader):
            print(_)
            error = 0
            penalty = 0
            for loader in train_loaders:
                inputs = next(loader, None)
                if inputs is None:
                    print("error!")
                u = inputs['u']
                v = inputs['v']
                x = torch.cat((inputs['x'], inputs['xs']), 1)
                t = inputs['t'].reshape(-1).type(torch.LongTensor)
                # print("x:", x.shape)
                # print("args.mode:",args.mode)
                if args.mode == 'v':
                    input_batch = v
                elif args.mode == 'x':
                    input_batch = x
                    # print("input_batch:", input_batch.shape)
                else:
                    input_batch = torch.cat((v, x),1)
                
                prediction = net(input_batch) 
                loss = loss_func(prediction * dummy_w, t)
                error += loss.mean()
                penalty += grad(loss.mean(), dummy_w,
                                create_graph=True)[0].pow(2).mean()
            optimizer.zero_grad()  
            (reg * error + (1 - reg) * penalty).backward()      
            optimizer.step()      

        log(logfile, 'The train accuracy: {:.2f} %'.format((torch.true_divide(sum(train.t.reshape(-1) == torch.max(F.softmax(net(train_input) , dim=1), 1)[1]), len(train.t))).item() * 100))
        log(_logfile, 'The test  accuracy: {:.2f} %'.format((torch.true_divide(sum(test.t.reshape(-1) == torch.max(F.softmax(net(test_input) , dim=1), 1)[1]), len(test.t))).item() * 100))

    return

In [3]:
''' bias rate '''
br = [-3.0, -2.5, -2.0, -1.5, -1.3, 1.3, 1.5, 2.0, 2.5, 3.0, 0.0]
brdc = {-3.0: 'n30', -2.5:'n25', -2.0:'n20', -1.5:'n15', -1.3:'n13', 1.3:'p13', 1.5:'p15', 2.0:'p20', 2.5:'p25', 3.0:'p30', 0.0:'0'}
which_benchmark = 'SynOOD2_'+'_'.join(str(item) for item in [args.sc, args.sh, args.one, args.depX, args.depU,args.VX])
which_dataset = '_'.join(str(item) for item in [args.mV, args.mX, args.mU, args.mXs])
resultDir = args.storage_path + f'/results/{which_benchmark}_{which_dataset}_{args.mode}/ood{brdc[args.ood]}/'
dataDir = f'{args.storage_path}/data/{which_benchmark}/{which_dataset}/'
os.makedirs(os.path.dirname(resultDir), exist_ok=True)
logfile = f'{resultDir}/log.txt'

In [14]:
exp = 0
if args.use_gpu:
    device = torch.device('cuda' if torch.cuda.is_available() and args.use_gpu else "cpu")
else:
    device = torch.device('cpu')
train_df = pd.read_csv(dataDir + f'{exp}/ood_{brdc[3.0]}/{args.mode}/train.csv')
train_df2 = pd.read_csv(dataDir + f'{exp}/ood_{brdc[-1.5]}/{args.mode}/train.csv')
val_df = pd.read_csv(dataDir + f'{exp}/ood_{brdc[-3.0]}/{args.mode}/val.csv')
test_df = pd.read_csv(dataDir + f'{exp}/ood_{brdc[-3.0]}/{args.mode}/test.csv')
# 合并 train 和 val 数据集
combined_df = pd.concat([train_df, train_df2], ignore_index=True)
# 打乱顺序
combined_df = shuffle(combined_df)
print(dataDir + f'{exp}/ood_{brdc[3.0]}/{args.mode}/train.csv')
print(dataDir + f'{exp}/ood_{brdc[-3.0]}/{args.mode}/test.csv')
# 创建新的数据集
combined_dataset = CausalDataset(combined_df, variables=['v', 'u', 'x', 'xs', 'z', 'p', 's', 'm', 't', 'g', 'y', 'f', 'c'])
train = CausalDataset(train_df, variables = ['v','u','x','xs','z','p','s','m','t','g','y','f','c'])
val = CausalDataset(val_df, variables = ['v','u','x','xs','z','p','s','m','t','g','y','f','c'])
test = CausalDataset(test_df, variables = ['v','u','x','xs','z','p','s','m','t','g','y','f','c'])
train,val,test = run(exp, args, dataDir, resultDir, train, val, test, device, r=0)

../../Data//data/SynOOD2_1_0_1_0.05_0.05_1/2_10_4_2/0/ood_p30/vx/train.csv
../../Data//data/SynOOD2_1_0_1_0.05_0.05_1/2_10_4_2/0/ood_n30/vx/test.csv
args.regt_lr  0.1
Exp 0 :this is the 0/5 epochs.
The train accuracy: 84.07 %
The test  accuracy: 74.77 %
Exp 0 :this is the 1/5 epochs.
The train accuracy: 85.76 %
The test  accuracy: 75.00 %
Exp 0 :this is the 2/5 epochs.
The train accuracy: 86.21 %
The test  accuracy: 74.84 %
Exp 0 :this is the 3/5 epochs.
The train accuracy: 86.45 %
The test  accuracy: 74.51 %
Exp 0 :this is the 4/5 epochs.
The train accuracy: 86.61 %
The test  accuracy: 74.29 %


In [10]:
exp = 1
train_df = pd.read_csv(dataDir + f'{exp}/ood_{brdc[3.0]}/train.csv')
# train = CausalDataset(train_df, variables = ['u','x','v','xs','z','p','s','m','t','g','y','f','c'], observe_vars=['v','x','xs'])
train_df.columns


Index(['v1', 'v2', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9', 'x10',
       'u1', 'u2', 'u3', 'u4', 'xs1', 'xs2', 'z', 'pi', 't', 'mu0', 'mu1', 'y',
       'f'],
      dtype='object')

In [14]:
''' OOD test'''
br = [-3.0, -2.5, -2.0, -1.5, -1.3, 0.0, 1.3, 1.5, 2.0, 2.5, 3.0]
brdc = {-3.0: 'n30', -2.5:'n25', -2.0:'n20', -1.5:'n15', -1.3:'n13', 1.3:'p13', 1.5:'p15', 2.0:'p20', 2.5:'p25', 3.0:'p30', 0.0:'0'}
exp = 0
for r in br:
    args.data_dict[r] = {
        'train': None,
        'val': None,
        'test': None,
        'trainloader_reg': None,
        'env': 0,
    }
    if r in args.env_list:
        args.data_dict[r]['env'] = 1
        train_df = pd.read_csv(dataDir + f'{exp}/ood_{brdc[r]}/{args.mode}/train.csv')
        val_df = pd.read_csv(dataDir + f'{exp}/ood_{brdc[r]}/{args.mode}/val.csv')
        test_df = pd.read_csv(dataDir + f'{exp}/ood_{brdc[r]}/{args.mode}/test.csv')

        args.data_dict[r]['train'] = CausalDataset(train_df, variables = ['u','x','v','xs','z','p','s','m','t','g','y','f','c'], observe_vars=['v','x','xs'])
        args.data_dict[r]['val'] = CausalDataset(val_df, variables = ['u','x','v','xs','z','p','s','m','t','g','y','f','c'], observe_vars=['v','x','xs'])
        args.data_dict[r]['test'] = CausalDataset(test_df, variables = ['u','x','v','xs','z','p','s','m','t','g','y','f','c'], observe_vars=['v','x','xs'])


exp = 0
if args.use_gpu:
    device = torch.device('cuda' if torch.cuda.is_available() and args.use_gpu else "cpu")
else:
    device = torch.device('cpu')
train_df = pd.read_csv(dataDir + f'{exp}/ood_{brdc[-1.3]}/{args.mode}/train.csv')
train_df2 = pd.read_csv(dataDir + f'{exp}/ood_{brdc[2.5]}/{args.mode}/train.csv')
val_df = pd.read_csv(dataDir + f'{exp}/ood_{brdc[-3.0]}/{args.mode}/val.csv')
test_df = pd.read_csv(dataDir + f'{exp}/ood_{brdc[3.0]}/{args.mode}/val.csv')
# 合并 train 和 val 数据集
combined_df = pd.concat([train_df, train_df2], ignore_index=True)
# 打乱顺序
combined_df = shuffle(combined_df)

# 创建新的数据集
combined_dataset = CausalDataset(combined_df, variables=['v', 'u', 'x', 'xs', 'z', 'p', 's', 'm', 't', 'g', 'y', 'f', 'c'])
train = CausalDataset(train_df, variables = ['v','u','x','xs','z','p','s','m','t','g','y','f','c'])
val = CausalDataset(val_df, variables = ['v','u','x','xs','z','p','s','m','t','g','y','f','c'])
test = CausalDataset(test_df, variables = ['v','u','x','xs','z','p','s','m','t','g','y','f','c'])
print(args.env_list)
run_ood_IRM(exp, args, dataDir, resultDir, train, val, test, ood_test_dict=[]) 


[-3.0, -1.5, 1.5]
Exp 0 :this is the 0/5 epochs.
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
The train accuracy: 74.91 %
The test  accuracy: 72.24 %
Exp 0 :this is the 1/5 epochs.
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
The train accuracy: 77.31 %
The test  accuracy: 75.24 %
Exp 0 :this is the 2/5 epochs.
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
The train accuracy: 78.96 %
The test  accuracy: 77.32 %
Exp 0 :this is the 3/5 epochs.
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
The train accuracy: 79.84 %
The test  accuracy: 78.16 %
Exp 0 :this is the 4/5 epochs.
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
The train accuracy: 80.56 %
The test  accuracy: 79.11 %


In [7]:
env = args.data_dict[-3.0]['env']

In [9]:
env = 10

In [12]:
args.data_dict[-3.0]['env']

10

In [5]:
exp = 0
train_df = pd.read_csv(dataDir + f'{exp}/ood_{brdc[3.0]}/{args.mode}/train.csv')

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split

# 读取数据集
data = train_df

# 划分训练集和剩余数据
train_data, remaining_data = train_test_split(data, test_size=0.3, shuffle=True, random_state=42)

# 划分测试集和验证集
test_data, val_data = train_test_split(remaining_data, test_size=(1/3), shuffle=True, random_state=42)

# 打印划分后的数据集大小
print("训练集大小:", len(train_data))
print("测试集大小:", len(test_data))
print("验证集大小:", len(val_data))

# 进行后续操作，使用划分后的数据集

训练集大小: 7000
测试集大小: 2000
验证集大小: 1000


In [10]:
train_data

Unnamed: 0,v0,v1,u0,u1,u2,u3,x0,x1,x2,x3,...,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13
9069,0.524600,-0.525552,0.481662,0.401454,1.898650,1.968174,0.978375,-0.341613,0.629651,0.082054,...,0.629651,0.082054,-0.924961,-0.518290,1.281546,0.598323,0.613425,0.756848,0.467906,0.222363
2603,1.214455,-0.751910,-0.811350,1.006010,-0.449178,0.515667,-0.471500,-0.193685,0.511216,0.235622,...,0.511216,0.235622,-0.512427,0.555847,-0.094491,-0.013939,1.032238,0.105693,0.325317,0.432817
7738,-0.671099,0.036636,0.227971,-0.396112,2.877801,0.635873,-0.349373,0.692276,-0.002813,0.229380,...,-0.002813,0.229380,-0.101457,-1.053224,1.369451,0.272344,0.730052,-0.148678,0.866132,1.052830
1579,-1.863404,-0.307318,1.752581,0.183689,-0.062972,-1.531521,-0.795271,-1.164185,1.105108,-0.432358,...,1.105108,-0.432358,-0.586890,-0.518608,1.229404,0.649537,-0.490955,0.066665,1.139130,0.955422
5058,-0.705511,1.106909,-0.992365,-0.317162,1.583554,0.884582,-0.386704,0.997796,0.503503,-0.681674,...,0.503503,-0.681674,0.281742,0.384268,-0.628569,-0.117194,-0.575836,0.612674,0.679975,0.800622
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5734,-0.428441,-1.260926,-1.175668,0.752964,-1.181665,-1.542833,-0.270879,0.104049,0.112153,-0.230140,...,0.112153,-0.230140,-1.179994,-0.036267,0.631529,0.602401,1.270601,-0.883364,0.836381,0.779818
5191,-0.221534,-0.403937,-0.472669,1.350004,-0.472227,0.822671,-1.263000,0.322743,-0.788642,-1.244502,...,-0.788642,-1.244502,-1.398260,-0.039018,-1.279876,0.152198,-0.146648,-1.779244,1.645399,1.345456
5390,0.902519,0.426075,0.611076,1.684176,1.037696,-0.433580,-0.121180,0.517547,0.117253,-0.191259,...,0.117253,-0.191259,0.664572,-0.807385,0.818987,0.415126,0.225441,-0.618124,0.206968,0.544344
860,1.659318,0.078774,0.930513,-0.303925,0.326301,0.635371,-0.043024,-1.330140,0.505785,-0.455293,...,0.505785,-0.455293,0.229716,0.383941,-0.910247,0.768532,0.665644,1.180827,0.082144,0.333329


In [11]:
dataDir='../Data/Causal/'
ihdp_train_path = dataDir + 'ihdp_npci_1-1000.test.npz'
ihdp_train = np.load(ihdp_train_path)

In [12]:
ihdp_train['x'].shape

(75, 25, 1000)

In [11]:
str(10)

'10'

In [15]:
['x' + str(i+1) for i in range(10)] + ['xs' + str(i+1) for i in range(2)]

['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9', 'x10', 'xs1', 'xs2']