- [Linear Regression](#linear-regression)
- [Logistic Regression + MCMC + Moment Matching](#Logistic-Regression-+-MCMC-+-Moment-Matching)
- [Logistic Regression + MCMC](#Logistic-Regression-+-MCMC)
- [Logistic Regression + PP Posterior Update](#Logistic-Regression-+-PP-Posterior-Update)
- [Online IRT MCMC](#Online-IRT-MCMC)
- [Batch IRT Search](#Online-IRT-Search)

In [1]:
import probpy as pp
import numpy as np
import numba
import random

linear regression
---

In [2]:
%%time
def predict(w, x):
     return x[:, 0] * w[0] + x[:, 1] * w[1] + x[:, 2] * w[2] + w[3]

w = [5, -2, 1, 5] # True underlying model
x = np.random.rand(100, 3) * 10
y = predict(w, x) + pp.normal.sample(mu=0, sigma=1, size=100).reshape(-1)



prior = pp.multivariate_normal.med(mu=np.ones(4) * 0, sigma=np.eye(4) * 5)
likelihood = pp.unilinear.med(sigma=1) # There exist an implementation for linear because it has a conjugate prior

for i in range(100):
    data = (y[i], x[i])
    
    prior = pp.parameter_posterior(data, likelihood=likelihood, priors=prior)
    
    if i % 10 == 0:
        w_approx = pp.mode(prior)
        print("Parameter Estimate", w_approx)
        
        print("Prior MSE", np.square(y - predict(w_approx, x)).mean(), 
              "True MSE", np.square(y - predict(w, x)).mean())
        print()

Parameter Estimate [0.82871848 1.36663447 1.05396753 0.18889215]
Prior MSE 277.25015207512416 True MSE 0.9309396499000755

Parameter Estimate [ 4.93475771 -1.91257485  1.14722506  4.65322367]
Prior MSE 1.182302842696888 True MSE 0.9309396499000755

Parameter Estimate [ 4.97811283 -1.92435945  1.05503181  4.65488931]
Prior MSE 0.9023775122893809 True MSE 0.9309396499000755

Parameter Estimate [ 4.98803271 -1.97502956  1.03973278  4.93142443]
Prior MSE 0.9003507517599928 True MSE 0.9309396499000755

Parameter Estimate [ 4.96881772 -1.9343865   1.06708071  4.60489222]
Prior MSE 0.8797889423173266 True MSE 0.9309396499000755

Parameter Estimate [ 4.95998228 -1.9557319   1.0557666   4.86612511]
Prior MSE 0.8848950007817868 True MSE 0.9309396499000755

Parameter Estimate [ 5.0144002  -1.93218228  1.07232771  4.32980917]
Prior MSE 0.8987865126926101 True MSE 0.9309396499000755

Parameter Estimate [ 5.01103915 -1.93415371  1.08589985  4.30729862]
Prior MSE 0.9067724473768244 True MSE 0.9309396

Logistic Regression + MCMC + Moment Matching
---

In [3]:
%%time
def sigmoid(x):
    return (1 / (1 + np.exp(-x)))

def predict(w, x):
     return x[:, 0] * w[0] + x[:, 1] * w[1] + x[:, 2] * w[2] + w[3]
    
w = [-3, 3, 5, -3] # True underlying model

x = np.random.rand(100, 3)
y = sigmoid(predict(w, x) + pp.normal.sample(mu=0.0, sigma=1.0, size=100).reshape(-1))

# For this we need custom likelihood since there is no conjugate prior

def likelihood(y, x, w):
    return pp.normal.p((y - sigmoid(x @ w[:, :-1, None] + w[:, None, None, -1]).squeeze(axis=2)),
                    mu=0.0, sigma=1.0)
    
    

prior = pp.multivariate_normal.med(mu=np.zeros(4), sigma=np.eye(4) * 5)

for i in range(50):
    j = random.randint(0, 80)
    data = (y[j: j + 20], x[j: j + 20])
    
    prior = pp.parameter_posterior(data, likelihood=likelihood, 
                                   priors=prior, 
                                   match_moments_for=pp.multivariate_normal,
                                   batch=30,
                                   samples=3000,
                                   mixing=200, 
                                   energies=1.0,
                                   mode="mcmc")

    if i % 10 == 0:
        w_approx = pp.mode(prior)
        print("Parameter Estimate", w_approx)
        
        print("Prior MSE", np.square(y - sigmoid(predict(w_approx, x))).mean(), 
              "True MSE", np.square(y - sigmoid(predict(w, x))).mean())
        print()

Parameter Estimate [-0.56643691  0.01167147  0.50108083 -0.112122  ]
Prior MSE 0.08355865080037034 True MSE 0.024105154186198567

Parameter Estimate [-2.50840043  0.97065721  3.47186427 -1.38396819]
Prior MSE 0.02783725503291344 True MSE 0.024105154186198567

Parameter Estimate [-2.80727047  1.53716795  4.42385481 -2.06322427]
Prior MSE 0.024474449084781212 True MSE 0.024105154186198567

Parameter Estimate [-3.0899075   1.97110935  4.61049436 -2.20479381]
Prior MSE 0.024217021795250964 True MSE 0.024105154186198567

Parameter Estimate [-2.91774719  2.17032564  5.07494335 -2.54859062]
Prior MSE 0.023796332923517315 True MSE 0.024105154186198567

CPU times: user 15.9 s, sys: 23.3 s, total: 39.2 s
Wall time: 10.3 s


Logistic Regression + MCMC
---

In [4]:
%%time
def sigmoid(x):
    return (1 / (1 + np.exp(-x)))

def predict(w, x):
     return x[:, 0] * w[0] + x[:, 1] * w[1] + x[:, 2] * w[2] + w[3]
    
w = [-3, 3, 5, -3] # True underlying model

x = np.random.rand(100, 3)
y = sigmoid(predict(w, x) + pp.normal.sample(mu=0.0, sigma=1.0, size=100).reshape(-1))

# For this we need custom likelihood since there is no conjugate prior

def likelihood(y, x, w):
    return pp.normal.p((y - sigmoid(x @ w[:, :-1, None] + w[:, None, None, -1]).squeeze(axis=2)),
                    mu=0.0, sigma=1.0)
    
    

prior = pp.multivariate_normal.med(mu=np.zeros(4), sigma=np.eye(4) * 5)

for i in range(50):
    j = random.randint(0, 80)
    data = (y[j: j + 20], x[j: j + 20])
    
    prior = pp.parameter_posterior(data, likelihood=likelihood, 
                                   priors=prior, 
                                   batch=5,
                                   samples=1000,
                                   mixing=100, 
                                   energies=1.0,
                                   mode="mcmc")

    if i % 10 == 0:
        modes = pp.mode(prior) # modes are sorted in order first is largest

        print("Number of modes", len(modes))
        w_approx = modes[0]
        
        print("Parameter Estimate", w_approx)
        
        print("Prior MSE", np.square(y - sigmoid(predict(w_approx, x))).mean(), 
              "True MSE", np.square(y - sigmoid(predict(w, x))).mean())
        print()

Number of modes 1
Parameter Estimate [-1.19411958 -0.02208488  0.92481752 -0.54798574]
Prior MSE 0.07415779260670184 True MSE 0.02946900658880509

Number of modes 2
Parameter Estimate [-2.47421231  2.52737833  3.27763166 -1.68074876]
Prior MSE 0.03529069490967807 True MSE 0.02946900658880509

Number of modes 1
Parameter Estimate [-2.61975057  3.03076536  6.08372257 -4.36140545]
Prior MSE 0.03924042504755918 True MSE 0.02946900658880509

Number of modes 3
Parameter Estimate [-5.13335341  3.85757239  6.06846311 -3.15129313]
Prior MSE 0.03728190473799192 True MSE 0.02946900658880509

Number of modes 3
Parameter Estimate [-3.90166767  4.26777063  9.55988051 -6.25581856]
Prior MSE 0.046972469011489916 True MSE 0.02946900658880509

CPU times: user 11.2 s, sys: 1e+03 ms, total: 12.2 s
Wall time: 11.2 s


Logistic Regression + Search
---


In [6]:
%%time
def sigmoid(x):
    return (1 / (1 + np.exp(-x)))

def predict(w, x):
     return x[:, 0] * w[0] + x[:, 1] * w[1] + x[:, 2] * w[2] + w[3]
    
w = [-3, 3, 5, -3] # True underlying model

x = np.random.rand(100, 3)
y = sigmoid(predict(w, x) + pp.normal.sample(mu=0.0, sigma=1.0, size=100).reshape(-1))

# For this we need custom likelihood since there is no conjugate prior
def likelihood(y, x, w):
    return pp.normal.p((y - sigmoid(x @ w[:, :-1, None] + w[:, None, None, -1]).squeeze(axis=2)),
                    mu=0.0, sigma=1.0)
    
    

prior = pp.multivariate_normal.med(mu=np.zeros(4), sigma=np.eye(4) * 10)

for i in range(5):
    data = (y, x)
    
    prior = pp.parameter_posterior(data, likelihood=likelihood, 
                                   priors=prior, 
                                   batch=50,
                                   samples=500,
                                   energies=0.25,
                                   mode="search",
                                   volume=100)

    modes = pp.mode(prior) # modes are sorted in order first is largest

    
    print("Number of modes", len(modes))
    for mode in modes:
        print(mode)

    w_approx = modes[0]

    print("Parameter Estimate", w_approx)

    print("Prior MSE", np.square(y - sigmoid(predict(w_approx, x))).mean(), 
          "True MSE", np.square(y - sigmoid(predict(w, x))).mean())
    print()

Number of modes 3
[-2.37358098  2.45672423  3.10944516 -1.77229386]
[-0.66060629  1.86470548  2.99982691 -2.25591765]
[-1.64797576  2.80994706  3.05165764 -2.60653526]
Parameter Estimate [-2.37358098  2.45672423  3.10944516 -1.77229386]
Prior MSE 0.024396306396545043 True MSE 0.023778247413701364

Number of modes 2
[-1.06294304  1.95304921  2.89078294 -1.99914373]
[-0.98022663  2.40756965  2.8856674  -2.20346115]
Parameter Estimate [-1.06294304  1.95304921  2.89078294 -1.99914373]
Prior MSE 0.028783499163620784 True MSE 0.023778247413701364

Number of modes 2
[-1.36434593  2.05394559  3.2633851  -2.46433219]
[-0.88306586  2.24353647  2.94768213 -2.41801106]
Parameter Estimate [-1.36434593  2.05394559  3.2633851  -2.46433219]
Prior MSE 0.02618509691057883 True MSE 0.023778247413701364

Number of modes 2
[-1.33450489  2.19951978  3.27158992 -2.32161314]
[-1.50296009  2.05911929  3.04411491 -1.93785178]
Parameter Estimate [-1.33450489  2.19951978  3.27158992 -2.32161314]
Prior MSE 0.02458

Online-IRT MCMC
---

In [9]:
%%time

def sigmoid(x):
    return (1 / (1 + np.exp(-x)))

def logit(x):
    return np.log(x / (1 - x))

student_skill = logit(0.7)

items = logit(np.array([0.4, 0.6, 0.8, 0.7]))  # difficulties

def likelihood(obs, item, skill):
    result = []
    for _skill in skill:
        result.append(pp.normal.p(obs - sigmoid(_skill - item), mu=0.0, sigma=0.6))

    return np.array(result)

samples = 30
obs, its = [], []
for i in range(samples):  # 100 samples
    item = items[np.random.randint(0, items.size)]
    outcome = (np.random.rand() < sigmoid(student_skill - item)).astype(np.float)

    obs.append(outcome)
    its.append(item)

prior_skill = pp.normal.med(mu=0.0, sigma=10)

for i in range(samples):
    prior_skill = pp.parameter_posterior((obs[i], its[i]), likelihood=likelihood, priors=prior_skill,
                                         mode="mcmc", match_moments_for=pp.normal,
                                         samples=20000, mixing=3000, batch=5)
    modes = sigmoid(np.array(pp.mode(prior_skill)))
    
    
    print("observation", obs[i], "item", sigmoid(its[i]), "mode", sigmoid(pp.mode(prior_skill)))



observation 1.0 item 0.4 mode 0.6659695025664282
observation 1.0 item 0.8 mode 0.8020286414840695
observation 1.0 item 0.4 mode 0.8508628052739741
observation 1.0 item 0.4 mode 0.9129996094595377
observation 0.0 item 0.8 mode 0.8412823033867718
observation 0.0 item 0.6 mode 0.7354109825371952
observation 0.0 item 0.7 mode 0.6618174814469742
observation 0.0 item 0.8 mode 0.5871783987109122
observation 0.0 item 0.8 mode 0.5035837085155406
observation 0.0 item 0.6 mode 0.43600480550947657
observation 0.0 item 0.4 mode 0.3737703684795617
observation 1.0 item 0.4 mode 0.4503008206841502
observation 1.0 item 0.7 mode 0.5263954859032576
observation 0.0 item 0.7 mode 0.4853816985735765
observation 1.0 item 0.4 mode 0.5472069801893906
observation 1.0 item 0.8 mode 0.5895748042908497
observation 1.0 item 0.7 mode 0.6369822092748112
observation 1.0 item 0.7 mode 0.672791678985395
observation 1.0 item 0.6 mode 0.7008580224126745
observation 1.0 item 0.7 mode 0.7342480704115563
observation 0.0 item

Online IRT Search
---

In [11]:
%%time

def sigmoid(x):
    return (1 / (1 + np.exp(-x)))

def logit(x):
    return np.log(x / (1 - x))

student_skill = logit(0.7)

items = logit(np.array([0.4, 0.6, 0.8, 0.7]))  # difficulties

def likelihood(obs, item, skill):
    result = []
    for _skill in skill:
        result.append(pp.normal.p(obs - sigmoid(_skill - item), mu=0.0, sigma=0.6))

    return np.array(result)

samples = 30
obs, its = [], []
for i in range(samples):  # 100 samples
    item = items[np.random.randint(0, items.size)]
    outcome = (np.random.rand() < sigmoid(student_skill - item)).astype(np.float)

    obs.append(outcome)
    its.append(item)

prior_skill = pp.normal.med(mu=0.0, sigma=10)

for i in range(samples):
    prior_skill = pp.parameter_posterior((obs[i], its[i]), 
                                         likelihood=likelihood, priors=prior_skill,
                                         mode="search",
                                         samples=500, batch=5,
                                         volume=100, energy=0.1)
    modes = sigmoid(np.array(pp.mode(prior_skill)))
    
    print("observation", obs[i], "item", sigmoid(its[i]), "mode", sigmoid(pp.mode(prior_skill))[0])




observation 1.0 item 0.4 mode [0.66908634]
observation 1.0 item 0.4 mode [0.68402731]
observation 0.0 item 0.8 mode [0.67153789]
observation 1.0 item 0.6 mode [0.68855157]
observation 1.0 item 0.6 mode [0.70638245]
observation 1.0 item 0.4 mode [0.70899041]
observation 1.0 item 0.4 mode [0.71843775]
observation 0.0 item 0.6 mode [0.69275993]
observation 1.0 item 0.6 mode [0.70491049]
observation 0.0 item 0.7 mode [0.68388039]
observation 1.0 item 0.6 mode [0.69991047]
observation 1.0 item 0.6 mode [0.71534563]
observation 1.0 item 0.8 mode [0.73985352]
observation 0.0 item 0.6 mode [0.71149975]
observation 1.0 item 0.7 mode [0.73387809]
observation 0.0 item 0.7 mode [0.71202318]
observation 0.0 item 0.4 mode [0.68790718]
observation 0.0 item 0.8 mode [0.67527891]
observation 0.0 item 0.7 mode [0.65349838]
observation 1.0 item 0.6 mode [0.67431735]
observation 1.0 item 0.4 mode [0.68075407]
observation 0.0 item 0.7 mode [0.66447142]
observation 0.0 item 0.7 mode [0.64694849]
observation