In [1]:
from causallib.contrib.hemm import HEMM

In [2]:
from causallib.contrib.hemm.gen_ihdp_data import generateData

In [3]:
from causallib.contrib.hemm.hemm_utilities import genSplits, returnIndices

In [4]:
from causallib.contrib.hemm.hemm_utilities import getMeanandStd

In [5]:
from causallib.contrib.hemm.hemm_outcome_models import BalancedNet, genLinearModule, genMLPModule

In [6]:
data = generateData()

IHDP Data exists


In [7]:
#data

In [8]:
import numpy as np
import torch
np.random.seed(0)
# ###
# i = 0 
# outcomeModel='CF'
# comp=2
# lr=1e-3
# batch_size=5
# response='cont'
# ###


def experiment( i, comp=2, response='bin', outcomeModel='linear', lr=1e-3, batch_size=100):
    Xtr = data['TRAIN']['x'][:,:,i]
    Ttr = data['TRAIN']['t'][:,i]
    Ytr = data['TRAIN']['yf'][:,i]

    Ytr_ = np.ones_like(Ytr)

    splits = genSplits(Ttr, Ytr_)

    train, dev = returnIndices(splits)

    n = Xtr.shape[0] 


    Xte = data['TEST']['x'][:,:,i]
    Tte = data['TEST']['t'][:,i]
    Yte = data['TEST']['yf'][:,i]

    mu , std = getMeanandStd(Xtr)

    Xdev = torch.from_numpy(Xtr[dev].astype('float64'))
    Ydev = torch.from_numpy(Ytr[dev].astype('float64'))
    Tdev = torch.from_numpy(Ttr[dev].astype('float64'))

    Xtr = torch.from_numpy(Xtr[train].astype('float64'))
    Ytr = torch.from_numpy(Ytr[train].astype('float64'))
    Ttr = torch.from_numpy(Ttr[train].astype('float64'))


    Xte = torch.from_numpy(Xte.astype('float64'))
    Yte = torch.from_numpy(Yte.astype('float64'))
    Tte = torch.from_numpy(Tte.astype('float64'))

    torch.manual_seed(0)

    
    if outcomeModel == 'MLP':
        outcomeModel = genMLPModule(Xte.shape[1], Xte.shape[1]/5, 2 )
    elif outcomeModel == 'linear':
        outcomeModel = genLinearModule(Xte.shape[1], 2 )
    elif outcomeModel == 'CF':
        outcomeModel = BalancedNet(Xte.shape[1], Xte.shape[1], 1 )
        

    model = HEMM(Xte.size()[1], comp, homo=True, mu=mu, std=std, bc=6, lamb=0.0000,\
                spread=.00,outcome_model=outcomeModel,sep_heads=True,epochs=500,\
                 learning_rate=lr,weight_decay=0.0001,metric='LL', use_p_correction=False,\
                 response=response,imb_fun=None,batch_size=batch_size )
    


    cd = model.fit(Xtr, Ttr,Ytr, validation_data=(Xdev, Tdev, Ydev))

    Xtr = data['TRAIN']['x'][:,:,i]
    Ttr = data['TRAIN']['t'][:,i]
    Ytr = data['TRAIN']['yf'][:,i]

    Xtr = torch.from_numpy(Xtr.astype('float64'))
    Ytr = torch.from_numpy(Ytr.astype('float64'))
    Ttr = torch.from_numpy(Ttr.astype('float64'))

    inSampleITE  = model.estimate_individual_outcome(Xtr, Ttr)
    outSampleITE = model.estimate_individual_outcome(Xte, Tte)
    
    #print (inSampleITE, type(inSampleITE))

    output  = ((inSampleITE[1]-inSampleITE[0]).to_numpy(), (outSampleITE[1]-outSampleITE[0]).to_numpy())
    
    return output

In [9]:
results = [experiment(0,outcomeModel='CF', comp=3,lr=1e-3,batch_size=50,response='cont') ]

In [10]:
from joblib import Parallel, delayed


In [None]:
results = Parallel(n_jobs=10)(delayed(experiment)(i=i, comp=3, response='cont', outcomeModel='CF', lr=1e-3, batch_size=10) for i in range(10))


In [26]:
#experiment(i=0, comp=3, response='cont', outcomeModel='CF', lr=1e-3, batch_size=10) 

In [27]:
rootpehe = []
for i in range(len(results)):
    
    if results[i] is None:
        
        continue
    
    difftr = (data['TRAIN']['mu1'][:, i] - data['TRAIN']['mu0'][:, i]) - results[i][0]

    difftr = difftr**2
    
    diffte = (data['TEST']['mu1'][:, i] - data['TEST']['mu0'][:, i]) - results[i][1]

    diffte = diffte**2

    rootpehe.append( (np.sqrt(difftr.mean()), np.sqrt(diffte.mean()) ))
    

In [28]:
rootpehe = np.array(rootpehe)

In [29]:
rootpehe[:,0]

array([1.12635803, 0.84228906, 0.82053145, 1.26380897, 1.06760059,
       0.85831832, 0.84142457, 0.84366406, 5.63072984, 1.83672189])

In [30]:
print (rootpehe[:, 0].mean())
print (rootpehe[:, 1].mean())

1.5131446772446633
1.3892723853069118
