- [Linear Regression](#linear-regression)
- [Logistic Regression + MCMC + Moment Matching](#Logistic-Regression-+-MCMC-+-Moment-Matching)
- [Logistic Regression + MCMC](#Logistic-Regression-+-MCMC)
- [Logistic Regression + Search](#Logistic-Regression-+-Search)

In [2]:
import probpy as pp
import numpy as np
import numba
import random

In [10]:
def predict(w, x):
     return x[:, 0] * w[0] + x[:, 1] * w[1] + x[:, 2] * w[2] + w[3]
    
w = [3, -2, 3, 5] # True underlying model

x = np.random.rand(100, 3) * 10
y = predict(w, x) + pp.normal.sample(mu=0, sigma=1, size=100).reshape(-1)

def evaluate(y, w_approx, logistic=False):
    print("Parameter Estimate", w_approx)
    if logistic:
        print("Prior MSE", np.square(y - sigmoid(predict(w_approx, x))).mean(), 
              "True MSE", np.square(y - sigmoid(predict(w, x))).mean())
    else:
        print("Prior MSE", np.square(y - predict(w_approx, x)).mean(), 
              "True MSE", np.square(y - predict(w, x)).mean())
    print()
    

linear regression
---

In [7]:
%%time

prior = pp.multivariate_normal.med(mu=np.ones(4) * 0, sigma=np.eye(4) * 5)
likelihood = pp.unilinear.med(sigma=1) # There exist an implementation for linear because it has a conjugate prior

for i in range(100):
    data = (y[i], x[i])
    
    prior = pp.parameter_posterior(data, likelihood=likelihood, prior=prior)
    
    if i % 10 == 0:
        evaluate(y, pp.mode(prior))


Parameter Estimate [1.51137495 1.7541886  1.68767276 0.44634722]
Prior MSE 160.09610767274725 True MSE 1.0150821032682253

Parameter Estimate [ 3.18783209 -2.07891946  2.95295174  4.96388544]
Prior MSE 1.426133861517623 True MSE 1.0150821032682253

Parameter Estimate [ 2.98445459 -1.92944274  2.96760977  5.04758955]
Prior MSE 1.0349532626790257 True MSE 1.0150821032682253

Parameter Estimate [ 2.98457113 -2.00606468  2.91276408  5.70439822]
Prior MSE 0.9969301762070326 True MSE 1.0150821032682253

Parameter Estimate [ 2.97200017 -2.01138558  2.91654733  5.74757405]
Prior MSE 0.9934653239552353 True MSE 1.0150821032682253

Parameter Estimate [ 2.96792041 -1.98305111  2.93791647  5.49069112]
Prior MSE 0.9844346927976906 True MSE 1.0150821032682253

Parameter Estimate [ 2.97491081 -1.97157862  2.94495308  5.39196258]
Prior MSE 0.9910344828832521 True MSE 1.0150821032682253

Parameter Estimate [ 2.99384071 -1.98988805  2.95935343  5.2121785 ]
Prior MSE 0.981181110376964 True MSE 1.01508210

Logistic Regression things
---

In [8]:
@numba.jit(nopython=True, fastmath=True, forceobj=False)
def sigmoid(x):
    return (1 / (1 + np.exp(-x)))

fast_p = pp.normal.fast_p # Need to assign here first since numba does not support jitting methods of classes

def likelihood(y, x, w):
    return fast_p(y - sigmoid(np.sum(x * w[:-1]) + w[-1]), mu=0.0, sigma=0.2)


Logistic Regression + MCMC + Moment Matching
---

In [11]:
%%time


logistic_y = sigmoid(y) + pp.normal.sample(mu=0, sigma=0.1, size=100).reshape(-1)
    

prior = pp.multivariate_normal.med(mu=np.zeros(4), sigma=np.eye(4) * 5)

for i in range(6):
    j = random.randint(0, 80)
    data = (logistic_y[j: j + 20], x[j: j + 20])
    
    prior = pp.parameter_posterior(data, likelihood=likelihood, 
                                   prior=prior, 
                                   match_moments_for=pp.multivariate_normal,
                                   batch=5,
                                   samples=30000,
                                   mixing=20000, 
                                   energy=0.1,
                                   mode="mcmc")

    evaluate(logistic_y, pp.mode(prior), logistic=True)

Parameter Estimate [ 1.8556192   0.89805479  1.14121048 -0.73649219]
Prior MSE 0.1303049165516874 True MSE 0.10531965500342082

Parameter Estimate [ 2.85442252  0.83749109  1.3250087  -1.50933187]
Prior MSE 0.1301079246674576 True MSE 0.10531965500342082

Parameter Estimate [ 2.9605184   0.96696013  1.18212971 -1.61820646]
Prior MSE 0.13016806437826442 True MSE 0.10531965500342082

Parameter Estimate [ 3.75486136 -0.12518465  1.35139376 -1.16330702]
Prior MSE 0.11659812831543404 True MSE 0.10531965500342082

Parameter Estimate [ 3.57299666  0.08909493  1.36760802 -1.44033162]
Prior MSE 0.12231543556899974 True MSE 0.10531965500342082

Parameter Estimate [ 3.59308238  0.14841552  1.29437415 -1.57553887]
Prior MSE 0.12378700489557908 True MSE 0.10531965500342082

CPU times: user 15.7 s, sys: 19.3 ms, total: 15.7 s
Wall time: 15.7 s


Logistic Regression + MCMC
---

In [12]:
%%time

logistic_y = sigmoid(y) + pp.normal.sample(mu=0, sigma=0.1, size=100).reshape(-1)
prior = pp.multivariate_normal.med(mu=np.zeros(4), sigma=np.eye(4) * 5)

for i in range(5):
    data = (logistic_y, x)
    
    prior = pp.parameter_posterior(data, likelihood=likelihood, 
                                   prior=prior, 
                                   batch=5,
                                   samples=5000,
                                   mixing=100, 
                                   energy=0.3,
                                   mode="mcmc")

    modes = pp.mode(prior) # modes are sorted in order first is largest
    evaluate(logistic_y, pp.mode(prior)[0], logistic=True)
  

Parameter Estimate [0.75888751 1.01434056 0.53427073 0.36624556]
Prior MSE 0.1133902962864118 True MSE 0.10465551586116453

Parameter Estimate [ 1.23943062  3.55555258  1.31001354 -0.057735  ]
Prior MSE 0.11335409862076753 True MSE 0.10465551586116453

Parameter Estimate [2.36558342 1.23159724 2.42748257 0.44546576]
Prior MSE 0.11334183371872636 True MSE 0.10465551586116453

Parameter Estimate [1.70694765 0.68480743 2.18627895 1.17019537]
Prior MSE 0.11332344473622578 True MSE 0.10465551586116453

Parameter Estimate [2.17284036 0.315641   1.45677032 2.282372  ]
Prior MSE 0.11328108457987791 True MSE 0.10465551586116453

CPU times: user 25.6 s, sys: 136 ms, total: 25.7 s
Wall time: 25.7 s


Logistic Regression + Search
---


In [14]:
%%time
    
logistic_y = sigmoid(y) + pp.normal.sample(mu=0, sigma=0.1, size=100).reshape(-1)

prior = pp.multivariate_normal.med(mu=np.zeros(4), sigma=np.eye(4) * 10)

for i in range(5):
    data = (logistic_y, x)
    
    prior = pp.parameter_posterior(data, likelihood=likelihood, 
                                   prior=prior, 
                                   batch=50,
                                   samples=500,
                                   energy=0.25,
                                   mode="search",
                                   volume=100)

    modes = pp.mode(prior) # modes are sorted in order first is largest
    evaluate(logistic_y, pp.mode(prior)[0], logistic=True)
  

Parameter Estimate [ 0.81665096 -0.44990258  1.04562951  4.09053447]
Prior MSE 0.1038146054100692 True MSE 0.10081423931262487

Parameter Estimate [ 0.83130727 -0.58442009  0.59877811  4.37373957]
Prior MSE 0.0998316213634307 True MSE 0.10081423931262487

Parameter Estimate [ 1.11137488 -0.7704312   0.56447397  4.19699473]
Prior MSE 0.09489611294118766 True MSE 0.10081423931262487

Parameter Estimate [ 1.44694725 -0.78036595  0.501888    4.12751663]
Prior MSE 0.09429105082554848 True MSE 0.10081423931262487

Parameter Estimate [ 1.55059332 -0.79113241  0.45028324  4.31139962]
Prior MSE 0.09423166234714586 True MSE 0.10081423931262487

CPU times: user 4.44 s, sys: 0 ns, total: 4.44 s
Wall time: 4.44 s
