In [1]:
# configure train/conf.py before running this notebook
# path2code = path to the Copula-GP main dir
# path2data = path to the data dir
# path2outputs = path to the output dir, were resuls and models will be stored

In [1]:
import pickle as pkl
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np

import sys
home = '/home/nina/CopulaGP/'
sys.path.insert(0, home)

import torch
import bvcopula
import select_copula
import utils
import train
from vine import CVine
from train import conf

In [8]:
NSamp=10000
device = torch.device('cuda:0')
x = torch.linspace(0.,1.,NSamp).numpy()
test_x = torch.tensor(x).float().to(device=device)

RuntimeError: CUDA error: device-side assert triggered

In [9]:
copula_model01 = utils.create_model('mixes',[bvcopula.ClaytonCopula_Likelihood(rotation='90°'), bvcopula.ClaytonCopula_Likelihood(rotation='0°')],x.shape[0],device=device)
copula_model02 = utils.create_model('thetas',[bvcopula.FrankCopula_Likelihood()],x.shape[0],device=device)
# copula_model03 = utils.create_model('thetas',[bvcopula.GumbelCopula_Likelihood(rotation='90°')],x.shape[0],device=device)
# copula_model12 = utils.create_model('thetas',[bvcopula.GaussianCopula_Likelihood()],x.shape[0],device=device)
# copula_model13 = utils.create_model('thetas',[bvcopula.GumbelCopula_Likelihood(rotation='180°')],x.shape[0],device=device)
copula_model23 = utils.create_model('thetas',[bvcopula.GumbelCopula_Likelihood(rotation='270°')],x.shape[0],device=device)
layers = [[copula_model01, copula_model02],[copula_model23]]

RuntimeError: CUDA error: device-side assert triggered

In [4]:
vine = CVine(layers,test_x,device=device)
V = vine.sample()

In [5]:
vine.log_prob(V[0]).mean()

tensor(0.9273, device='cuda:0')

In [6]:
def stimMI(vine, alpha=0.05, sem_tol=1e-3, mc_size=20):
    # Gaussian confidence interval for sem_tol and level alpha
    conf = torch.erfinv(torch.tensor([1. - alpha])).to(device)
    sem = torch.ones(2).to(device)*float('inf')
    Hrs = torch.zeros(1).to(device) # sum of conditional entropies 
    Hr = torch.zeros(1).to(device) # entropy of p(r)
    var_sum = torch.zeros_like(sem)
    log2 = torch.tensor([2.]).log().to(device)
    k = 0
    inputs = vine.inputs.numel()
    N = mc_size*inputs
    with torch.no_grad():
        while torch.any(sem >= sem_tol):
            # Generate samples from p(r|s)*p(s)
            samples = vine.sample(torch.Size([mc_size]))[0]
            subset = torch.randperm(torch.numel(samples[...,0]))[:100]
            samples = samples[subset]
            samples = torch.einsum("ij...->ji...",samples) # samples(1D MC) x inputs (1D) x variables (1D)

            print(samples.shape)
            logpRgS = vine.log_prob(samples) / log2
            assert torch.all(logpRgS==logpRgS)
            assert torch.all(logpRgS.abs()!=float("inf"))

            # now find E[p(r|s)] under p(s) with MC
            rR = torch.ones(mc_size).to(device)*float('inf')
            pR = torch.zeros(mc_size).to(device)
            var_sumR = torch.zeros(mc_size).to(device)
            kR = 0
            # print(f"Start calculating p(r) {k}")
            while torch.any(rR >= sem_tol): #relative error of p(r) = absolute error of log p(r)
                new_order = torch.randperm(torch.numel(samples[...,0])) # permute samples & inputs
                new_subset = samples.reshape(-1,samples.shape[-1])[new_order]
                print(new_order.shape,new_subset.shape)
                pRs = vine.log_prob(samples).exp()
                kR += 1
                # Monte-Carlo estimate of p(r)
                pR += (pRs.mean(dim=-1) - pR) / kR
                # Estimate standard error
                var_sumR += ((pRs - pR.unsqueeze(-1)) ** 2).sum(dim=-1)
                semR = conf * (var_sumR / (kR * inputs * (kR * inputs - 1))).pow(.5) 
                rR = semR/pR #relative error
                if kR%20==0:
                    print(rR.max()/sem_tol)
            # print(f"Finished in {kR} steps")

            logpR = pR.log() / log2 #[N,f]
            k += 1
            if k>100:
                print('MC integral failed to converge')
                break
            # Monte-Carlo estimate of MI
            #MI += (log2p.mean(dim=0) - MI) / k # mean over sample dimensions -> [f]
            Hrs += (logpRgS.mean(dim=0) - Hrs) / k # negative sum H(r|s) * p(s)
            Hr += (logpR.mean(dim=0) - Hr) / k # negative entropy H(r)
            # Estimate standard error
            var_sum[0] += ((logpRgS - Hrs) ** 2).sum(dim=0)
            var_sum[1] += ((logpR - Hr) ** 2).sum(dim=0)
            sem = conf * (var_sum / (k * N * (k * N - 1))).pow(.5)
            # print(f"{Hrs.mean().item():.3},{Hr.mean().item():.3},{(Hrs.mean()-Hr.mean()).item():.3},\
            #     {sem[0].max().item()/sem_tol:.3},{sem[1].max().item()/sem_tol:.3}") #balance convergence rates
    return (Hrs-Hr), (sem[0]**2+sem[1]**2).pow(.5), Hr, sem[1] #2nd arg is an error of sum
    

In [7]:
stimMI(vine, mc_size=200)

torch.Size([200, 20000, 3])


RuntimeError: CUDA error: device-side assert triggered

In [13]:
path = '/home/nina/CopulaGP/synthetic_data'
exp_pref='SCVine'
data = {}
y = V[0].cpu().numpy()
assert np.all(y>0) & np.all(y<1)
data['X'] = x
data['Y'] = y

with open(f"{path}/{exp_pref}_layer0.pkl",'wb') as f:
    pkl.dump(data,f)

In [None]:
all_results = []
for layer in range(len(layers)):

    print(f'Starting {exp_pref} layer {layer}')

    results = train.train_next_layer(exp_pref, layer, device=device)
    all_results.append(results)
    with open(f"{conf.path2outputs}/{exp_pref}/models_layer{layer}.pkl","wb") as f:
        pkl.dump(results,f)
    NN = train.transform2next_layer(exp_pref,layer, device)
    print(f"NN = {NN}")

Starting SCVine layer 0
[Errno 17] File exists: '/home/nina/CopulaGP/synthetic_data/outputs/SCVine'
[Errno 17] File exists: '/home/nina/CopulaGP/synthetic_data/outputs/SCVine/layer0'
Selecting 0-1 on cpu


In [None]:
with open(f"{path}/{exp_pref}_layer1.pkl",'rb') as f:
    layer1=pkl.load(f)

In [None]:
plt.figure(figsize=(3,3))
colors = cm.rainbow(x)
plt.scatter(layer1['Y'][::10,0],layer1['Y'][::10,1], color=colors[::10])

In [None]:
Y_new = np.array([V[1][...,0].cpu().numpy(),layer1['Y'][:,0]]).T

plt.figure(figsize=(3,3))
colors = cm.rainbow(x)
plt.scatter(*Y_new.T, color=colors)

In [None]:
layers = [V[0]]
log_prob = 0
for layer, copulas in enumerate(vine.layers):
    next_layer = []
    for n, copula in enumerate(copulas):
        print(layer,layer+n+1, copula.copulas)
        log_prob += copula.log_prob(layers[-1][:,[n+1,0]])
        next_layer.append(copula.ccdf(layers[-1][:,[n+1,0]]))
    layers.append(torch.stack(next_layer,dim=-1))
print(log_prob.mean())
#         samples = torch.tensor(Y[:,[n,0]]).float().squeeze().to(device) # order!

In [None]:
print(V[0][:,0].shape)

plt.figure(figsize=(3,3))
colors = cm.rainbow(x)
plt.scatter(V[0][:,1].cpu().numpy(),layers[2][:,0].cpu().numpy(), color=colors)

In [None]:
V[0][...,0].shape