### Csuite data simulation testing template

In [8]:
# User
user = "nk1922"

# Imports
import torch
from torch import nn
from torch.distributions import Normal,Laplace,Uniform
import matplotlib.pyplot as plt
import os
import time
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

os.chdir('C:/Users/{0}/OneDrive/Documents/Cocycles project/Cocycle_code'.format(user))
from Cocycle_CDAGM import *
from Cocycle_model import *
from Cocycle_optimise import *
from Cocycle_loss_functions import *
from Conditioners import *
from Transformers import *
from KDE_estimation import *
from Kernels import *
from Helper_functions import *
os.chdir('C:/Users/{0}/OneDrive/Documents/Cocycles project/Experiments_code'.format(user))
from Csuite import Nonlin_Gauss_chain as DGP
from Csuite import NonlinGausschain_conditioner as Conditioner
#from Csuite import Fork_Nonlin as DGP
#from Csuite import Fork_conditioner as Conditioner
#from Csuite import Simpson_Nonlin as DGP
#from Csuite import Simpson_conditioner as Conditioner
os.chdir('C:/Users/{0}/OneDrive/Documents/Cocycles project/Experimental_results'.format(user))

In [9]:
# Experimental set up
parents = [[],[0],[1]]
#parents = [[],[],[0,1],[2]]
#parents = [[],[0],[0,1],[2]]
trials = 1
N,Nint = 5000,10**7
int_levels = [-2,-1,0,1,2]
intervention = lambda a,x : a+x*0
quantiles = [0.1,0.5,0.9]
adversarial_distribution = False
store_quantiles = False
gamma_param = 1

# Cocycle Estimation set up
cocycle_estimators = ["CLS_M", 
                      "CMMD_M",
                      "JMMD_M",
                      "HSIC"]
RFF_features = [False,False,False,False]
n_RFF = [100,100,100,100]
median_heuristic = [False,True,True,True]

# RQS set up
RQS_bins = [2,4,8]

# MLE set up
Gaussian_SCM = True

# NN set up
train_val_split = 1
ntrain = int(train_val_split*N)
width = 128
layers = 2
conditioner_learn_rate = 1e-3
transformer_learn_rate = 1e-3
scheduler = True
val_tol = 1e-3
val_loss = False
batch_size = 64
maxiter = 5000
miniter = 5000

# KDE set up
kde_learn_rate = 0.1
kde_miniter = 200
kde_maxiter = 1000
kde_tol = 1e-4
kde_nfold = 5
kde_reg = 1e-10

# Samples to draw 
mc_samples = 10**5

In [10]:
# Names and dimensions
names = (["Gaussian SCM"]+
         ["RQS SCM {0}".format(i) for i in RQS_bins]+
         ["Cocycles {0}".format(i) for i in cocycle_estimators]+
         ["Cocycles {0} KDE".format(i) for i in cocycle_estimators]+
         ["True"])
n_model = len(names)
n_int = len(int_levels)
n_quantile = len(quantiles)

# Storage objects
ATE = torch.zeros((n_model,n_int,trials))
QTE = torch.zeros((n_model,n_int,trials,n_quantile))
E_DO = torch.zeros((n_model,n_int,trials))
Q_DO = torch.zeros((n_model,n_int,trials,n_quantile))
Training_time = torch.zeros((n_model,trials))

# True model construction
true_models = []
for i in range(len(parents)):
    true_models.append(cocycle_model([Conditioner(i)],Shift_Transformer()))

In [14]:
# Initiating trials
for t in range(trials):
    
    torch.manual_seed(t)
    
    # Draw data
    Xobs,Xint = DGP(N, Nint, True, intervention, int_levels, adversarial_distribution, alpha = gamma_param)
    Xobs,Xobstest = Xobs[:N],Xobs[N:]
    
    # Estimate models
    ls_models = []
    cocycle_models = [[] for i in range(len(cocycle_estimators))]
    RQS_models = [[]for i in range(len(RQS_bins))]
    
    for i in range(len(parents)):
        
        # Getting relevant variables from graph
        index_x,index_y = parents[i],[i]
        X,Y = Xobs[:,index_x].view(N,len(index_x)),Xobs[:,index_y].view(N,len(index_y))
          
        # Data Preprocessing
        inputs_train,outputs_train, inputs_val,outputs_val = X[:ntrain],Y[:ntrain],X[ntrain:],Y[ntrain:]

        # Defining and training Gaussian SCM
        if Gaussian_SCM:
            start_time = time.time()
            loss_fn = Loss(loss_fn = "MLE")
            if len(parents[i])>0:
                conditioner_shift = NN_Conditioner(width = width, layers = layers, input_dims =  len(index_x), output_dims = len(index_y) ,bias = True)
                conditioner_scale = Constant_Conditioner(init = torch.log(outputs_train.var()**0.5/2))
            else:
                conditioner_shift = Constant_Conditioner(init = outputs_train.mean())
                conditioner_scale = Constant_Conditioner(init = torch.log(outputs_train.var()**0.5))
            transformer = Affine_Transformer(log_det = True)
            ls_model = cocycle_model([conditioner_shift,conditioner_scale],transformer)
            ls_model = Train(ls_model).optimise(loss_fn,inputs_train,outputs_train,inputs_val,outputs_val, batch_size = batch_size,conditioner_learn_rate = conditioner_learn_rate,
                                                transformer_learn_rate = transformer_learn_rate,print_ = True,plot = False, miniter = miniter,maxiter = maxiter, val_tol = val_tol,val_loss = val_loss,
                                               scheduler = scheduler)
            ls_model.transformer.ld = False # set log determinant to false after
            ls_models.append(ls_model)

            # Storing training time
            Training_time[0,t] +=  time.time() - start_time

        # Defining and training cocycle models
        if len(parents[i])>0:
            for m in range(len(cocycle_estimators)):
                start_time = time.time()
                loss_fn = Loss(loss_fn = cocycle_estimators[m],kernel = [gaussian_kernel(torch.ones(1),1),gaussian_kernel(torch.ones(1),1)])
                if RFF_features[m]:
                    loss_fn.get_RFF_features(n_RFF[m])
                if median_heuristic[m]:
                    if cocycle_estimators[m] == "HSIC":
                        loss_fn.median_heuristic(inputs_train,outputs_train/2, subsamples = 10**4)
                    else:
                        loss_fn.median_heuristic(inputs_train,outputs_train, subsamples = 10**4)                        
                conditioner = NN_Conditioner(width = width, layers = layers, input_dims =  len(index_x), output_dims = len(index_y) ,bias = True)
                transformer = Shift_Transformer()
                model = cocycle_model([conditioner],transformer)
                model = Train(model).optimise(loss_fn,inputs_train,outputs_train,inputs_val,outputs_val, batch_size = batch_size,conditioner_learn_rate = conditioner_learn_rate,
                                              transformer_learn_rate = transformer_learn_rate,print_ = True,plot = False, miniter = miniter,maxiter = maxiter, val_tol = val_tol,val_loss = val_loss,
                                                                     scheduler = scheduler)
                cocycle_models[m].append(model)
                
                end_time = time.time()

                Training_time[1+len(RQS_bins)+m,t] += end_time - start_time
                Training_time[1+len(RQS_bins)+len(cocycle_estimators)+m,t] += end_time - start_time
        else:
            for m in range(len(cocycle_estimators)):
                cocycle_models[m].append([])

        # Defining and training RQS flow models
        start_time = time.time()
        loss_fn = Loss(loss_fn = "MLE")
        for m in range(len(RQS_bins)):
            if len(parents[i])>0:
                conditioner = NN_Conditioner(width = width, layers = layers, input_dims =  len(index_x), output_dims = len(index_y) ,bias = True)
            else:
                conditioner = Empty_Conditioner()
            transformer =  RQS_Shift_Transformer(widths = torch.zeros((1,RQS_bins[m]),requires_grad = True),
                                   heights = torch.zeros((1,RQS_bins[m]), requires_grad = True),
                                   derivatives = torch.zeros((1,RQS_bins[m]+1), requires_grad = True)
                                   )
            RQS_model = cocycle_model([conditioner],transformer)
            RQS_model = Train(RQS_model).optimise(loss_fn,inputs_train,outputs_train,inputs_val,outputs_val, batch_size = batch_size,conditioner_learn_rate = conditioner_learn_rate,
                                                  transformer_learn_rate = transformer_learn_rate,print_ = True,plot = False, miniter = miniter,maxiter = maxiter, val_tol = val_tol,val_loss = val_loss,
                                                 scheduler = scheduler)
            RQS_model.transformer.ld = False # set log determinant to false after
            RQS_models[m].append(RQS_model)

            Training_time[1+m,t] += time.time() - start_time

    # KDE estimation for cocycles
    start_time = time.time()
    kernel = inverse_gaussian_kernel(torch.ones(len(Xobs.T),requires_grad = True),1)
    KDE_model = KDE(kernel)
    losses = KDE_model.optimise(Xobs,kde_learn_rate,kde_miniter,kde_maxiter,kde_tol,kde_nfold,kde_reg)
    Training_time[1+len(cocycle_estimators)+len(RQS_bins):-1,t] += time.time() - start_time
    
    # Defining cocycle CDAGMs
    ccdagm = []
    for m in range(len(cocycle_estimators)):
        ccdagm.append(CCDAGM(cocycle_models[m],parents))

    # Interventional prediction
    print(t)
    for i in range(len(int_levels)):
        
        # Setting intervention level
        a = int_levels[i]
        
        # Getting interventional_samples
        Xpred = [],[]
        for m in range(n_model):
            if m == 0 and Gaussian_SCM:
                Xpred,Xintpred = SCM_intervention_sample(parents,ls_models,[Normal(0,1)]*len(parents),intervention,[["id",a,"id","id"]],mc_samples)
                Xintpred = Xintpred[0]
            elif m > 0 and m <= len(RQS_bins):
                Xpred,Xintpred = SCM_intervention_sample(parents,RQS_models[m-1],[Normal(0,1)]*len(parents),intervention,[["id",a,"id","id"]],mc_samples)
                Xintpred = Xintpred[0]
            elif m > len(RQS_bins) and m <= len(RQS_bins)+len(cocycle_estimators):
                Xpred,Xintpred = ccdagm[m-len(RQS_bins)-1].interventional_dist_sample(Xobs,intervention,["id",a,"id","id"],len(Xobs),uniform_subsample = False)
            elif m > len(RQS_bins) + len(cocycle_estimators) and m < n_model -1:
                Xpred,Xintpred = ccdagm[m-len(RQS_bins)-len(cocycle_estimators)-1].interventional_dist_sample(Xobs,intervention,["id",a,"id","id"],mc_samples,density = KDE_model,uniform_subsample = True)
            else:
                Xpred,Xintpred = Xobstest,Xint[i]
                                  
            E_DO[m,i,t] = Xintpred[:,-1].mean()
            ATE[m,i,t] = E_DO[m,i,t] - Xpred[:,-1].mean()
        
            if store_quantiles:
                k = 0
                for q in quantiles:
                    Q_DO[m,i,t,k] = Xintpred[:,-1].quantile(q)
                    QTE[m,i,t,k] = Q_DO[m,i,t,k] - Xintpred[:,-1].quantile(q)
                    k += 1

Training loss last 10 avg is : tensor(-1.3597)
Completion % : 99.82
iter 0 , loss =  tensor(21932.8242)
iter 10 , loss =  tensor(17238.1094)
iter 20 , loss =  tensor(15938.0635)
iter 30 , loss =  tensor(15938.8281)
iter 40 , loss =  tensor(15870.0859)
iter 50 , loss =  tensor(15838.0439)
iter 60 , loss =  tensor(15831.7393)
iter 70 , loss =  tensor(15825.6113)
iter 80 , loss =  tensor(15826.5859)
iter 90 , loss =  tensor(15825.5918)
iter 100 , loss =  tensor(15825.4844)
iter 110 , loss =  tensor(15825.4453)
iter 120 , loss =  tensor(15825.4316)
iter 130 , loss =  tensor(15825.4092)
iter 140 , loss =  tensor(15825.4141)
iter 150 , loss =  tensor(15825.4141)
iter 160 , loss =  tensor(15825.4131)
iter 170 , loss =  tensor(15825.4131)
iter 180 , loss =  tensor(15825.4092)
iter 190 , loss =  tensor(15825.4102)
0


In [5]:
# Saving output
os.chdir('C:/Users/{0}/OneDrive/Documents/Cocycles project'.format(user))
torch.save({ "names": names, 
            "ATE (models x int_levels x trials)": ATE,
            "QTE (models x int_levels x trials x quantiles)": QTE,
            "EY|DO(X) (models x int_levels x trials)": E_DO,
            "QY|DO(X) (models x int_levels x trials x quantiles)": Q_DO,
            "Training time ( models x trials)": Training_time
           },
           f = f'Experimental_results/'+'Test_adversarial={0}_trials={1}_gamma_param={2}_batchsize={3}.pt'.format(adversarial_distribution,trials, gamma_param,batch_size))

RuntimeError: File Experimental_results/Test_adversarial=True_trials=1_gamma_param=1.pt cannot be opened.

In [12]:
# Current
ints = [0,1,2,3,4]
for i in range(n_model):
    print ((torch.abs(ATE[i,ints,:t+1]-ATE[-1,ints,:t+1])**2).mean(1)**0.5, names[i])

tensor([0.0366, 0.0463, 0.0317, 0.0034, 0.0034]) Gaussian SCM
tensor([0.0096, 0.0953, 0.0495, 0.0050, 0.0739]) RQS SCM 2
tensor([0.0269, 0.0277, 0.0161, 0.0547, 0.0199]) RQS SCM 4
tensor([0.0347, 0.0078, 0.0122, 0.0294, 0.0582]) RQS SCM 8
tensor([0.0089, 0.0119, 0.0096, 0.0150, 0.0073]) Cocycles CLS_M
tensor([0.0434, 0.0393, 0.0170, 0.0827, 0.0495]) Cocycles CMMD_M
tensor([0.0049, 0.0211, 0.0151, 0.0430, 0.0385]) Cocycles JMMD_M
tensor([0.0014, 0.0380, 0.0528, 0.0997, 0.0850]) Cocycles HSIC
tensor([0.0138, 0.0143, 0.0103, 0.0163, 0.0089]) Cocycles CLS_M KDE
tensor([0.0453, 0.0384, 0.0217, 0.0840, 0.0540]) Cocycles CMMD_M KDE
tensor([0.0037, 0.0174, 0.0110, 0.0455, 0.0424]) Cocycles JMMD_M KDE
tensor([0.0049, 0.0393, 0.0523, 0.1024, 0.0922]) Cocycles HSIC KDE
tensor([0., 0., 0., 0., 0.]) True


In [31]:
# NonlinGauss Gauss
ints = [0,1,2,3,4]
for i in range(n_model):
    print ((torch.abs(ATE[i,ints,:t+1]-ATE[-1,ints,:t+1])**2).mean(1)**0.5, names[i])

tensor([0.0165, 0.0427, 0.0197, 0.0297, 0.0818]) Gaussian SCM
tensor([0.0596, 0.0625, 0.0208, 0.0455, 0.1094]) RQS SCM 2
tensor([0.0052, 0.0531, 0.0200, 0.0372, 0.0107]) RQS SCM 4
tensor([0.0142, 0.0149, 0.0351, 0.0058, 0.0230]) RQS SCM 8
tensor([0.0210, 0.0338, 0.0419, 0.0574, 0.0411]) Cocycles CLS_M
tensor([0.0182, 0.0134, 0.0132, 0.1109, 0.0947]) Cocycles CMMD_M_RFF
tensor([0.0223, 0.0012, 0.0526, 0.0574, 0.1265]) Cocycles JMMD_M_RFF
tensor([0.0227, 0.0362, 0.0438, 0.0603, 0.0411]) Cocycles CLS_M KDE
tensor([0.0197, 0.0141, 0.0157, 0.1124, 0.0963]) Cocycles CMMD_M_RFF KDE
tensor([0.0237, 0.0065, 0.0503, 0.0560, 0.1282]) Cocycles JMMD_M_RFF KDE
tensor([0., 0., 0., 0., 0.]) True


In [17]:
# Fork adverse 1
ints = [0,1,2,3,4]
for i in range(n_model):
    print ((torch.abs(ATE[i,ints,:t+1]-ATE[-1,ints,:t+1])**2).mean(1)**0.5, names[i])

tensor([1.2679, 0.5764, 1.0512, 0.8755, 4.2517]) Gaussian SCM
tensor([0.4845, 0.2934, 0.4476, 0.3895, 4.1084]) RQS SCM 2
tensor([0.5963, 0.2579, 0.4390, 0.3138, 4.5994]) RQS SCM 4
tensor([0.2796, 0.3244, 0.2105, 0.4409, 4.5236]) RQS SCM 8
tensor([0.3786, 0.0422, 0.0498, 0.1297, 3.9917]) Cocycles CLS_M
tensor([0.1821, 0.0924, 0.1078, 0.2357, 4.7863]) Cocycles CMMD_M_RFF
tensor([0.2309, 0.0479, 0.0422, 0.3763, 4.4364]) Cocycles JMMD_M_RFF
tensor([0.3225, 0.0517, 0.1307, 0.2402, 3.9522]) Cocycles CLS_M KDE
tensor([0.1354, 0.0807, 0.0176, 0.3041, 4.6749]) Cocycles CMMD_M_RFF KDE
tensor([0.1768, 0.1176, 0.0385, 0.4557, 4.3503]) Cocycles JMMD_M_RFF KDE
tensor([0., 0., 0., 0., 0.]) True


In [12]:
# NonlinG adverse 1
ints = [0,1,2,3,4]
for i in range(n_model):
    print ((torch.abs(ATE[i,ints,:t+1]-ATE[-1,ints,:t+1])**2).mean(1)**0.5, names[i])

tensor([0.1955, 0.1644, 0.2406, 0.1722, 0.2436]) Gaussian SCM
tensor([0.0591, 0.0347, 0.0025, 0.0220, 0.0417]) RQS SCM 2
tensor([0.0002, 0.0729, 0.1041, 0.0864, 0.1201]) RQS SCM 4
tensor([0.0834, 0.0011, 0.0267, 0.0383, 0.0899]) RQS SCM 8
tensor([0.0135, 0.0074, 0.0161, 0.0627, 0.0023]) Cocycles CLS_M
tensor([0.0017, 0.0219, 0.0094, 0.0228, 0.0984]) Cocycles CMMD_M_RFF
tensor([0.0128, 0.0260, 0.0336, 0.0334, 0.0701]) Cocycles JMMD_M_RFF
tensor([0.0123, 0.0094, 0.0141, 0.0594, 0.0004]) Cocycles CLS_M KDE
tensor([0.0029, 0.0183, 0.0056, 0.0231, 0.0967]) Cocycles CMMD_M_RFF KDE
tensor([0.0143, 0.0247, 0.0342, 0.0350, 0.0709]) Cocycles JMMD_M_RFF KDE
tensor([0., 0., 0., 0., 0.]) True
