In [1]:
import stan
import torch
import gpytorch
import math
import asyncio
import nest_asyncio
nest_asyncio.apply()

%matplotlib inline
%load_ext autoreload
%autoreload 2


In [2]:
asyncio.run(asyncio.sleep(1))

In [3]:
# Training data is 100 points in [0,1] inclusive regularly spaced
train_x = torch.linspace(0, 1, 5)
# True function is sin(2*pi*x) with Gaussian noise
train_y = torch.sin(train_x * (2 * math.pi)) + torch.randn(train_x.size()) * math.sqrt(0.04)

In [4]:

STAN_data = {'N': 100, 'D': 4, 'x': [-1.7062203884124756, -1.6717512607574463, -1.6372822523117065, -1.6028130054473877, -1.568343997001648, -1.5338748693466187, -1.499405860900879, -1.46493661403656, -1.4304676055908203, -1.395998477935791, -1.3615294694900513, -1.327060341835022, -1.2925913333892822, -1.258122205734253, -1.2236530780792236, -1.1891839504241943, -1.154714822769165, -1.1202456951141357, -1.085776686668396, -1.0513075590133667, -1.0168384313583374, -0.9823693633079529, -0.9479001760482788, -0.9134311676025391, -0.878961980342865, -0.8444929122924805, -0.8100237846374512, -0.7755547165870667, -0.7410856485366821, -0.7066165208816528, -0.6721473932266235, -0.637678325176239, -0.6032092571258545, -0.5687401294708252, -0.5342710614204407, -0.49980196356773376, -0.46533286571502686, -0.43086379766464233, -0.39639464020729065, -0.36192557215690613, -0.32745644450187683, -0.2929873466491699, -0.258518248796463, -0.2240491509437561, -0.1895800530910492, -0.15511097013950348, -0.12064185738563538, -0.08617276698350906, -0.051703665405511856, -0.017234569415450096, 0.017234528437256813, 0.05170363560318947, 0.08617272228002548, 0.1206418126821518, 0.1551109254360199, 0.189580038189888, 0.2240491807460785, 0.2585182785987854, 0.2929874062538147, 0.3274564743041992, 0.36192557215690613, 0.39639464020729065, 0.43086379766464233, 0.46533286571502686, 0.49980196356773376, 0.5342710614204407, 0.5687401294708252, 0.6032092571258545, 0.637678325176239, 0.6721474528312683, 0.7066166400909424, 0.7410856485366821, 0.7755547165870667, 0.8100237846374512, 0.8444929122924805, 0.878961980342865, 0.9134311079978943, 0.9479001760482788, 0.9823693633079529, 1.0168383121490479, 1.0513075590133667, 1.085776686668396, 1.1202456951141357, 1.154714822769165, 1.1891839504241943, 1.223652958869934, 1.2581220865249634, 1.2925913333892822, 1.327060341835022, 1.3615293502807617, 1.395998477935791, 1.4304676055908203, 1.46493661403656, 1.499405860900879, 1.5338748693466187, 1.568343997001648, 1.6028130054473877, 1.6372822523117065, 1.6717512607574463, 1.7062203884124756], 'y': [-1.947783630384947e-06, 0.8384983539581299, 1.350435495376587, 1.3364328145980835, 0.8019461631774902, -0.04487171769142151, -0.8742072582244873, -1.3630785942077637, -1.321084976196289, -0.764581024646759, 0.08969195932149887, 0.9090359807014465, 1.3743486404418945, 1.3044071197509766, 0.7264529466629028, -0.13442744314670563, -0.9429534673690796, -1.3842355012893677, -1.2864148616790771, -0.6875864267349243, 0.17902740836143494, 0.9759174585342407, 1.392728328704834, 1.2671271562576294, 0.6480297446250916, -0.22344724833965302, -1.007901668548584, -1.399818778038025, -1.2465636730194092, -0.6078219413757324, 0.26764193177223206, 1.038870096206665, 1.4054996967315674, 1.224745512008667, 0.56700199842453, -0.3115646541118622, -1.0687905550003052, -1.4097654819488525, -1.2016937732696533, -0.525610625743866, 0.3551761209964752, 1.0976362228393555, 1.412611722946167, 1.1774319410324097, 0.483690470457077, -0.39842915534973145, -1.1253769397735596, -1.4140355587005615, -1.1519849300384521, -0.4412827789783478, 0.4412810802459717, 1.1519839763641357, 1.4140355587005615, 1.1253776550292969, 0.39843034744262695, -0.48368895053863525, -1.1774319410324097, -1.412611722946167, -1.0976362228393555, -0.3551762104034424, 0.5256105065345764, 1.2016937732696533, 1.4097654819488525, 1.0687905550003052, 0.311564564704895, -0.5670020580291748, -1.224745512008667, -1.4054996967315674, -1.0388691425323486, -0.26764070987701416, 0.6078243255615234, 1.2465636730194092, 1.399818778038025, 1.007901668548584, 0.22344717383384705, -0.6480298638343811, -1.2671259641647339, -1.392728328704834, -0.9759174585342407, -0.1790301650762558, 0.6875863671302795, 1.2864148616790771, 1.3842355012893677, 0.9429534673690796, 0.13442736864089966, -0.7264506220817566, -1.3044061660766602, -1.3743486404418945, -0.9090380668640137, -0.08969474583864212, 0.764581024646759, 1.321084976196289, 1.3630785942077637, 0.8742072582244873, 0.04487164318561554, -0.8019461631774902, -1.3364328145980835, -1.350435495376587, -0.8384983539581299, 1.8736051288215094e-06], 
             't_mu': [-1.7920000553131104, 0.33799999952316284, 0.2840000092983246, -1.4630000591278076], 
             't_sigma': [[3.2660000324249268, 0.0, 0.0, 0.0], [0.0, 2.635999917984009, 0.0, 0.0], [0.0, 0.0, 0.9020000100135803, 0.0], [0.0, 0.0, 0.0, 1.6330000162124634]]}



    
STAN_code = """
    functions {
        array[] real softplus(array[] real v){
            array[num_elements(v)] real r;
            for (d in 1:num_elements(v)){
                r[d] = log(1.0 + exp(v[d]));
            }
            return r;
        }
        real softplus(real v){
            return log(1.0 + exp(v));
        }
    }
    
    data {
        int N;
        int D;
        array[N] real x;
        vector[N] y;
        vector[D] t_mu;
        matrix[D, D] t_sigma;
    }

     
    parameters {
        vector<lower=-3.0>[D] theta;
    }
    
    transformed parameters{
        cov_matrix[N] K;
        K = identity_matrix(dims(x)[1])*softplus(theta[1]) + gp_periodic_cov(x, 1.0, sqrt(softplus(theta[2])), softplus(theta[3]));
    }
    
    model {
        
        vector[N] mu;
        theta ~ multi_normal(t_mu, t_sigma);
        mu = zeros_vector(N);
        y ~ multi_normal(mu, K);
    }

    
    
    generated quantities {
        cov_matrix[N] PER = gp_periodic_cov(x, 1.0, sqrt(softplus(theta[2])), softplus(theta[3]));
        cov_matrix[N] LIN = softplus(theta[4]) * gp_dot_prod_cov(x, 0.0);
        cov_matrix[N] noise =  identity_matrix(dims(x)[1])*softplus(theta[1]);
        cov_matrix[N] TOT = identity_matrix(dims(x)[1])*softplus(theta[1]) + (gp_periodic_cov(x, 1.0, sqrt(softplus(theta[2])), softplus(theta[3])) .* softplus(theta[4]) * gp_dot_prod_cov(x, 0.0));
    }


"""

"""

        matrix[N, N] K;
        array[1] real left;
        array[1] real right;
        for (i in 1:N){
            for (j in 1:N){
            left[1] = x[j];
            right[1] = x[i];
                K[j, i] = softplus(theta[1]) + (softplus(theta[2]) * gp_periodic_cov(left, right, 1.0, sqrt(softplus(theta[3])), softplus(theta[4])))[1][1];
            }
        }
"""


#K = identity_matrix(dims(x)[1])*softplus(theta[1]) + ((softplus(theta[2]) * gp_periodic_cov(x, 1.0, sqrt(softplus(theta[3])), softplus(theta[4]))) * (softplus(theta[5]) * gp_exp_quad_cov(x, 1.0, softplus(theta[6]))));

'\n\n        matrix[N, N] K;\n        array[1] real left;\n        array[1] real right;\n        for (i in 1:N){\n            for (j in 1:N){\n            left[1] = x[j];\n            right[1] = x[i];\n                K[j, i] = softplus(theta[1]) + (softplus(theta[2]) * gp_periodic_cov(left, right, 1.0, sqrt(softplus(theta[3])), softplus(theta[4])))[1][1];\n            }\n        }\n'

In [5]:
post = stan.build(STAN_code, data=STAN_data, random_seed=1)


Building...



Building: found in cache, done.Messages from stanc:
    control flow statement inside function softplus depends on argument v. At
    '/tmp/httpstan_c8qssz0b/model_ej7zgg6c.stan', line 31, column 99 to
    column 107, the value of v depends on parameter(s): theta.
    control flow statement inside function softplus depends on argument v. At
    '/tmp/httpstan_c8qssz0b/model_ej7zgg6c.stan', line 31, column 49 to
    column 57, the value of v depends on parameter(s): theta.
    control flow statement inside function softplus depends on argument v. At
    '/tmp/httpstan_c8qssz0b/model_ej7zgg6c.stan', line 31, column 120 to
    column 128, the value of v depends on parameter(s): theta.
    provided, or the prior(s) depend on data variables. In the later case,
    this may be a false positive.


In [6]:
fit = post.sample(num_chains=1, num_samples=1)

Sampling:   0%
Sampling:   0% (1/1001)
Sampling:  10% (100/1001)
Sampling:  20% (200/1001)
Sampling:  30% (300/1001)
Sampling:  40% (400/1001)
Sampling:  50% (500/1001)
Sampling:  60% (600/1001)
Sampling:  70% (700/1001)
Sampling:  80% (800/1001)
Sampling:  90% (900/1001)
Sampling: 100% (1001/1001)
Sampling: 100% (1001/1001), done.
Messages received during sampling:
  Gradient evaluation took 0.001427 seconds
  1000 transitions using 10 leapfrog steps per transition would take 14.27 seconds.
  Adjust your expectations accordingly!
  Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:
  Exception: model_ej7zgg6c_namespace::log_prob: K is not symmetric. K[1,2] = -nan, but K[2,1] = -nan (in '/tmp/httpstan_u9io7rbn/model_ej7zgg6c.stan', line 30, column 8 to column 24)
  Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:
  Exception: model_ej7zgg6c_namespace::log_prob: K is

In [7]:
fit.to_frame()

parameters,lp__,accept_stat__,stepsize__,treedepth__,n_leapfrog__,divergent__,energy__,theta.1,theta.2,theta.3,...,TOT.91.100,TOT.92.100,TOT.93.100,TOT.94.100,TOT.95.100,TOT.96.100,TOT.97.100,TOT.98.100,TOT.99.100,TOT.100.100
draws,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,41.683219,0.923963,0.24329,3.0,11.0,0.0,-38.4789,-2.93903,-2.99712,3.016777,...,,,,,,,,,,


In [9]:
frame = fit.to_frame()
#print(frame[list(fit.constrained_param_names)[1:5]])
#print(frame[list(fit.constrained_param_names)[101:105]])
for key in ["PER", "LIN", "noise", "TOT"]:
    for i in range(100):
        for j in range(100):
            if not frame[f"{key}.{i+1}.{j+1}"][0] == frame[f"{key}.{j+1}.{i+1}"][0]:
                import pdb
                pdb.set_trace()
                print(frame[f"{key}.{i+1}.{j+1}"][0])
                print(frame[f"{key}.{j+1}.{i+1}"][0])
    print(f"{key} successful!")



    #frame[list(fit.constrained_param_names)]

> [0;32m/tmp/ipykernel_945876/2300815894.py[0m(10)[0;36m<cell line: 4>[0;34m()[0m
[0;32m      8 [0;31m                [0;32mimport[0m [0mpdb[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      9 [0;31m                [0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 10 [0;31m                [0mprint[0m[0;34m([0m[0mframe[0m[0;34m[[0m[0;34mf"{key}.{i+1}.{j+1}"[0m[0;34m][0m[0;34m[[0m[0;36m0[0m[0;34m][0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     11 [0;31m                [0mprint[0m[0;34m([0m[0mframe[0m[0;34m[[0m[0;34mf"{key}.{j+1}.{i+1}"[0m[0;34m][0m[0;34m[[0m[0;36m0[0m[0;34m][0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     12 [0;31m    [0mprint[0m[0;34m([0m[0;34mf"{key} successful!"[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m
ipdb> key
'LIN'
ipdb> q


BdbQuit: 

In [None]:
replacement_dictionary = {
    "c" : "softplus(theta[i])",
    "SE": "gp_exp_quad_cov(x, 1.0, softplus(theta[i]))",
    "PER": "gp_periodic_cov(x, 1.0, pow(softplus(theta[i]), 2), softplus(theta[i]))",
    "LIN": "softplus(theta[i]) * (x' * x)"
}
"""
Alternative for LIN:

softplus(theta[i]) * gp_dot_prod_cov(array[] real x, real sigma)
with sigma = 0
"""

In [None]:
# We will use the simplest form of GP model, exact inference
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ZeroMean()
        #self.covar_module = gpytorch.kernels.RBFKernel()  # Passed w. param = 0
        #self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel()) # Passed w. param=0, 1
        #self.covar_module = gpytorch.kernels.PeriodicKernel() # Passed, now that sqrt(param) is used. w params = 0, 1
        self.covar_module = gpytorch.kernels.LinearKernel() # Passed w. param = 0, 1
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.PeriodicKernel()) * gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel()) 

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

# initialize likelihood and model
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = ExactGPModel(train_x, train_y, likelihood)

In [None]:
list(model.named_parameters())
#list(model.parameters())[1].data = torch.tensor([[1.0]])
#list(model.parameters())[2].data = torch.tensor([[1.0]])

In [None]:
covariance = torch.eye(len(train_x)) * likelihood.noise + model.covar_module(train_x).evaluate()
covariance

In [None]:
torch.linalg.eig(covariance)

In [None]:
torch.nn.functional.softplus(torch.tensor(-3.0))**2