# 1. Univariate Linear Regression

In [1]:
using BayesianInference

# Setup device------------------------------------------------
m = importBI(platform="cpu")

# Import Data & Data Manipulation ------------------------------------------------
# Import
data_path = m.load.howell1(only_path = true)
m.data(data_path, sep=';') 
m.df = m.df[m.df.age > 18] # Subset data to adults
m.scale(["weight"]) # Normalize

# Define model ------------------------------------------------
@pymodel function model(weight, height)
    # Priors
    a = m.dist.normal(178, 20, name = 'a') 
    b = m.dist.log_normal(0, 1, name = 'b') 
    s = m.dist.uniform(0, 50, name = 's') 
    m.dist.normal(a + b * weight , s, obs = height) 
end

# Run mcmc ------------------------------------------------
m.fit(model)  # Optimize model parameters through MCMC sampling

# Summary ------------------------------------------------
m.summary() # Get posterior distributions

jax.local_device_count 32


sample: 100%|██████████| 1000/1000 [00:00<00:00, 1406.70it/s, 7 steps of size 7.35e-01. acc. prob=0.93]


Unnamed: 0,mean,sd,hdi_5.5%,hdi_94.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
a,154.64,0.26,154.27,155.14,0.01,0.01,421.95,407.54,
b,5.82,0.29,5.4,6.31,0.02,0.01,376.14,356.33,
s,5.15,0.2,4.81,5.47,0.01,0.01,446.91,333.97,


# 2. Multiple continuous Variables

In [2]:
using BayesianInference

# Setup device------------------------------------------------
m = importBI(platform="cpu")

# Import Data & Data Manipulation ------------------------------------------------
# Import
data_path = m.load.howell1(only_path = true)
m.data(data_path, sep=';') 
m.df = m.df[m.df.age > 18] # Subset data to adults
m.scale(["weight", "age"]) # Normalize

# Define model ------------------------------------------------
@pymodel function model(height, weight, age)
    # Parameter prior distributions
    alpha = m.dist.normal(0, 0.5, name = "alpha")    
    beta1 = m.dist.normal(0, 0.5, name = "beta1")
    beta2 = m.dist.normal(0, 0.5, name = "beta2")
    sigma = m.dist.uniform(0, 50, name = "sigma")
    # Likelihood
    m.dist.normal(alpha + beta1 * weight + beta2 * age, sigma, obs = height)
end

# Run mcmc ------------------------------------------------
m.fit(model)  # Optimize model parameters through MCMC sampling

# Summary ------------------------------------------------
m.summary() # Get posterior distributions

jax.local_device_count 32


sample: 100%|██████████| 1000/1000 [00:00<00:00, 1370.45it/s, 7 steps of size 5.54e-01. acc. prob=0.94]


Unnamed: 0,mean,sd,hdi_5.5%,hdi_94.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
alpha,5.18,0.49,4.32,5.93,0.02,0.02,394.7,277.21,
beta1,0.21,0.51,-0.57,0.99,0.02,0.03,577.0,264.99,
beta2,-0.05,0.51,-0.93,0.72,0.02,0.02,556.73,438.95,
sigma,49.98,0.02,49.96,50.0,0.0,0.0,437.37,227.58,


# 3. Interaction between continuous variables

In [1]:
using BayesianInference

# Setup device------------------------------------------------
m = importBI(platform="cpu")

# Import Data & Data Manipulation ------------------------------------------------
# Import
data_path = m.load.tulips(only_path = true)
m.data(data_path, sep=';')
m.scale(["blooms", "water", "shade"]) # Normalize
# Define model ------------------------------------------------
@pymodel function model(blooms,shade, water)
    sigma = m.dist.exponential(1, name = "sigma", shape = (1,))
    bws = m.dist.normal(0, 0.25, name = "bws", shape = (1,))
    bs = m.dist.normal(0, 0.25, name = "bs", shape = (1,))
    bw = m.dist.normal(0, 0.25, name = "bw", shape = (1,))
    a = m.dist.normal(0.5, 0.25, name = "a", shape = (1,))
    mu = a + bw*water + bs*shade + bws*water*shade
    m.dist.normal(mu, sigma, obs=blooms)
end

# Run mcmc ------------------------------------------------
m.fit(model)  # Optimize model parameters through MCMC sampling

# Summary ------------------------------------------------
m.summary() # Get posterior distributions

jax.local_device_count 32


sample: 100%|██████████| 1000/1000 [00:00<00:00, 1364.12it/s, 7 steps of size 5.97e-01. acc. prob=0.92]


Unnamed: 0,mean,sd,hdi_5.5%,hdi_94.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
a[0],0.09,0.1,-0.08,0.23,0.0,0.01,433.59,227.87,
bs[0],-0.31,0.11,-0.47,-0.14,0.01,0.01,435.69,295.24,
bw[0],0.57,0.11,0.38,0.71,0.0,0.0,474.53,316.63,
bws[0],-0.32,0.11,-0.5,-0.14,0.0,0.01,527.43,387.68,
sigma[0],0.57,0.09,0.43,0.7,0.0,0.0,402.35,398.08,


# 4. Categorical variable

In [14]:
using BayesianInference

# Setup device------------------------------------------------
m = importBI(platform="cpu")

# Import Data & Data Manipulation ------------------------------------------------
# Import
data_path = m.load.milk(only_path = true)
m.data(data_path, sep=';')
m.index("clade") # Convert clade names into index
m.scale(["kcal_per_g"]) # Scale

# Define model ------------------------------------------------
@pymodel function model(kcal_per_g, index_clade, mass)
    a = m.dist.normal(0, 0.5, shape=(4,), name = "a") # shape based on the number of clades
    b = m.dist.normal(0, 0.5, shape=(4,), name = "b")
    s = m.dist.exponential( 1, name = 's')    
    mu = a[index_clade]+b[index_clade]*mass
    m.dist.normal(mu, s, obs=kcal_per_g)
end

# Run mcmc ------------------------------------------------
m.fit(model)  # Optimize model parameters through MCMC sampling

# Summary ------------------------------------------------
m.summary() # Get posterior distributions

jax.local_device_count 32


sample: 100%|██████████| 1000/1000 [00:00<00:00, 1198.33it/s, 15 steps of size 3.70e-01. acc. prob=0.92]


Unnamed: 0,mean,sd,hdi_5.5%,hdi_94.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
a[0],-0.31,0.35,-0.85,0.25,0.02,0.01,474.93,424.32,
a[1],0.59,0.28,0.09,1.0,0.02,0.02,243.6,243.39,
a[2],0.32,0.37,-0.24,0.94,0.02,0.01,451.56,371.25,
a[3],-0.18,0.45,-0.95,0.54,0.02,0.02,373.27,299.02,
b[0],-0.0,0.01,-0.02,0.01,0.0,0.0,454.86,349.78,
b[1],-0.17,0.13,-0.35,0.05,0.01,0.01,268.98,199.26,
b[2],0.08,0.06,-0.03,0.17,0.0,0.0,378.01,418.7,
b[3],-0.26,0.25,-0.68,0.14,0.01,0.01,329.66,236.06,
s,0.79,0.13,0.6,0.95,0.01,0.01,316.99,297.75,


# 5. Binomial model

In [12]:
using BayesianInference

# Setup device------------------------------------------------
m = importBI(platform="cpu")

# Import Data & Data Manipulation ------------------------------------------------
# Import
data_path = m.load.chimpanzees(only_path = true)
m.data(data_path, sep=';')

# Define model ------------------------------------------------
@pymodel function model(pulled_left)
    a = m.dist.normal( 0, 10, shape=(1,), name = "a")
    m.dist.binomial(total_count = 1, logits=a[0], obs=pulled_left)
end

# Run mcmc ------------------------------------------------
m.fit(model)  # Optimize model parameters through MCMC sampling

# Summary ------------------------------------------------
m.summary() # Get posterior distributions

jax.local_device_count 32
jax.local_device_count 32
jax.local_device_count 32
jax.local_device_count 32


sample: 100%|██████████| 1000/1000 [00:00<00:00, 1669.82it/s, 3 steps of size 1.10e+00. acc. prob=0.93]


Unnamed: 0,mean,sd,hdi_5.5%,hdi_94.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
a[0],0.33,0.09,0.2,0.47,0.01,0.0,167.72,173.42,


# 6. Beta-Binomial model

In [16]:
using BayesianInference

# Setup device------------------------------------------------
m = importBI(platform="cpu")

# Import Data & Data Manipulation ------------------------------------------------
# Import
data_path = m.load.ucbadmit(only_path = true)
m.data(data_path, sep=';')

m.df["gid"] = m.df["applicant.gender"].ne("male").astype("int")

# Define model ------------------------------------------------
@pymodel function model(gid, applications, admit)
    # Prior for overall concentration scaling (positive, via exponential)
    phi = m.dist.exponential(1, name="phi")
    
    # Priors for group-level intercepts (two groups, normal-distributed)
    alpha = m.dist.normal(0., 1.5, shape=(2,), name="alpha")
    
    # Shifted concentration scale (avoids too small values)
    theta = phi + 2
    
    # Group-specific mean success probability (mapped to [0,1] with sigmoid)
    pbar = m.link.inv_logit(alpha[gid])
    
    # Beta distribution parameter for "successes"
    concentration1 = pbar * theta
    
    # Beta distribution parameter for "failures"
    concentration0 = (1 - pbar) * theta
    
    # Likelihood: admissions modeled with Beta-Binomial
    m.dist.beta_binomial(
        total_count=applications,
        concentration1=concentration1,
        concentration0=concentration0,
        obs=admit
    )
end

# Run mcmc ------------------------------------------------
m.fit(model)  # Optimize model parameters through MCMC sampling

# Summary ------------------------------------------------
m.summary() # Get posterior distributions

jax.local_device_count 32
jax.local_device_count 32


sample: 100%|██████████| 1000/1000 [00:00<00:00, 1148.51it/s, 7 steps of size 5.81e-01. acc. prob=0.92]


Unnamed: 0,mean,sd,hdi_5.5%,hdi_94.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
alpha[0],-0.46,0.42,-1.19,0.14,0.02,0.03,386.31,270.66,
alpha[1],-0.31,0.44,-1.02,0.34,0.02,0.02,456.8,360.97,
phi,0.94,0.76,0.0,1.99,0.04,0.03,232.37,170.82,


# 7. Poisson model

In [4]:
using BayesianInference

# Setup device------------------------------------------------
m = importBI(platform="cpu")

# Import Data & Data Manipulation ------------------------------------------------
# Import
data_path = m.load.kline(only_path = true)
m.data(data_path, sep=';')
m.scale(["population"]) # Normalize
m.df["cid"] = m.df.contact.eq("high").astype("int")

# Define model ------------------------------------------------
@pymodel function model(cid, population, total_tools)
    a = m.dist.normal(3, 0.5, shape= (2,), name="a")
    b = m.dist.normal(0, 0.2, shape=(2,), name="b")
    l = jnp.exp(a[cid] + b[cid]*population)
    m.dist.poisson(l, obs=total_tools)
end

# Run mcmc ------------------------------------------------
m.fit(model)  # Optimize model parameters through MCMC sampling

# Summary ------------------------------------------------
m.summary() # Get posterior distributions

jax.local_device_count 32
jax.local_device_count 32
jax.local_device_count 32
jax.local_device_count 32


sample: 100%|██████████| 1000/1000 [00:00<00:00, 1524.03it/s, 7 steps of size 6.43e-01. acc. prob=0.88]


Unnamed: 0,mean,sd,hdi_5.5%,hdi_94.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
a[0],3.22,0.09,3.06,3.34,0.0,0.0,420.46,351.5,
a[1],3.64,0.09,3.48,3.77,0.01,0.0,227.84,242.8,
b[0],0.35,0.05,0.27,0.43,0.0,0.0,429.98,230.63,
b[1],0.05,0.18,-0.25,0.34,0.01,0.01,365.3,221.96,


# 8. Gamma-Poisson model

In [1]:
using BayesianInference

# Setup device------------------------------------------------
m = importBI(platform="cpu")

# Import Data & Data Manipulation ------------------------------------------------
# Import
data_path = m.load.sim_gamma_poisson(only_path = true)
m.data(data_path, sep=',')

# Define model ------------------------------------------------
@pymodel function model(log_days, monastery, y)
    a = m.dist.normal(0, 1, name = "a", shape=(1,))
    b = m.dist.normal(0, 1, name = "b", shape=(1,))
    phi = m.dist.exponential(1, name = "phi", shape=(1,))
    mu = jnp.exp(log_days + a + b * monastery)
    Lambda =  m.dist.gamma(rate = mu*phi, concentration = phi, name = "Lambda")
    m.dist.poisson(rate = Lambda, obs=y)
end

# Run mcmc ------------------------------------------------
m.fit(model)  # Optimize model parameters through MCMC sampling

# Summary ------------------------------------------------
m.summary() # Get posterior distributions


SYSTEM: caught exception of type :MethodError while trying to print a failed Task notice; giving up


jax.local_device_count 32


sample: 100%|██████████| 1000/1000 [00:05<00:00, 191.15it/s, 31 steps of size 1.05e-01. acc. prob=0.91]


Unnamed: 0,mean,sd,hdi_5.5%,hdi_94.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
Lambda[0],1.58,0.35,1.09,2.13,0.01,0.02,637.16,428.01,
Lambda[1],1.50,0.36,0.92,2.03,0.01,0.02,604.37,313.75,
Lambda[2],1.55,0.34,0.99,2.02,0.01,0.02,712.46,429.17,
Lambda[3],1.42,0.34,0.89,1.91,0.01,0.02,757.41,343.08,
Lambda[4],1.47,0.35,0.91,2.00,0.01,0.01,560.18,432.92,
...,...,...,...,...,...,...,...,...,...
Lambda[3398],3.56,0.78,2.32,4.74,0.03,0.04,731.58,294.77,
Lambda[3399],3.74,0.75,2.62,4.85,0.03,0.04,597.26,354.34,
a[0],-0.42,0.01,-0.45,-0.40,0.00,0.00,56.51,127.82,
b[0],-2.76,0.03,-2.81,-2.71,0.00,0.00,61.40,124.34,


# 9. Categorical model

In [None]:
using BayesianInference

# Setup device------------------------------------------------
m = importBI(platform="cpu")

# Import Data & Data Manipulation ------------------------------------------------
# Import
data_path = m.load.sim_multinomial(only_path = true)
m.data(data_path, sep=',')

# Define model ------------------------------------------------
@pymodel function model(career, income)
    a = m.dist.normal(0, 1, shape=(2,), name = "a")
    b = m.dist.half_normal(0.5, shape=(1,), name = "b")
    
    # indexing works now because of the package update
    s_1 = a[0] + b * income[0]
    s_2 = a[1] + b * income[1]
    
    # ⚠️  Use jnp.array to create a Python object, so [0] indexing works
    s_3 = jnp.array([0.0]) 
    
    # Now s_3[0] is valid because it calls Python's __getitem__(0)
    p = jax.nn.softmax(jnp.stack([s_1[0], s_2[0], s_3[0]]))
    
    m.dist.categorical(probs=p, obs=career)
end

# Run mcmc ------------------------------------------------
m.fit(model)  # Optimize model parameters through MCMC sampling

# Summary ------------------------------------------------
m.summary() # Get posterior distributions

jax.local_device_count 32
jax.local_device_count 32


sample: 100%|██████████| 1000/1000 [00:00<00:00, 1277.17it/s, 7 steps of size 2.58e-01. acc. prob=0.85]


Unnamed: 0,mean,sd,hdi_5.5%,hdi_94.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
a[0],-2.06,0.24,-2.41,-1.7,0.04,0.03,35.81,22.0,
a[1],-1.55,0.15,-1.8,-1.33,0.03,0.02,39.0,22.56,
b[0],0.05,0.04,0.0,0.1,0.01,0.01,30.02,39.34,


# 10. Multinomial model

In [3]:
using BayesianInference

# Setup device------------------------------------------------
m = importBI(platform="cpu")

# Import Data & Data Manipulation ------------------------------------------------
# Import
data_path = m.load.sim_multinomial(only_path = true)
m.data(data_path, sep=',')

# Define model ------------------------------------------------
@pymodel function model(income, career)
    # Parameter prior distributions
    alpha = m.dist.normal(0, 1, shape=(2,), name='a')
    beta = m.dist.half_normal(0.5, shape=(1,), name='b')
    s_1 = alpha[0] + beta * income[0]
    s_2 = alpha[1] + beta * income[1]
    # ⚠️  Use jnp.array to create a Python object, so [0] indexing works
    s_3 = jnp.array([0.0]) 
    p = jnp.exp(jnp.stack([s_1[0], s_2[0], s_3[0]]))
    # Likelihood
    m.dist.multinomial(probs = p[career], obs=career)
end

# Run mcmc ------------------------------------------------
m.fit(model)  # Optimize model parameters through MCMC sampling

# Summary ------------------------------------------------
m.summary() # Get posterior distributions

jax.local_device_count 32


sample: 100%|██████████| 1000/1000 [00:00<00:00, 1300.88it/s, 7 steps of size 7.96e-01. acc. prob=0.90]


Unnamed: 0,mean,sd,hdi_5.5%,hdi_94.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
a[0],0.0,0.97,-1.6,1.51,0.05,0.04,428.89,395.09,
a[1],82.06,1.02,80.4,83.58,0.05,0.05,472.75,340.24,
b[0],40.96,0.5,40.12,41.65,0.02,0.02,616.3,368.44,


# 11. Zero-Inflated Models

In [4]:
using BayesianInference

# Setup device------------------------------------------------
m = importBI(platform="cpu", rand_seed = false)

# Simulated data ------------------------------------------------
prob_drink = 0.2  
rate_work = 1     

# Sample one year of production
N = 365

# Note: Use lowercase 'true' for booleans in Julia
# 'drink' will be a Python/JAX object
drink = m.dist.binomial(1, prob_drink, shape=(N,), sample=true)

# Math works automatically because 'drink' is a Py object 
# and we taught Julia how to handle Py arithmetic in the previous step
y = (1 - drink) * m.dist.poisson(rate_work, shape=(N,), sample=true)

# Send data to BI class object ------------------------------------------------
m.data_on_model = pydict(Dict("y" =>y))


# Define model ------------------------------------------------
@pymodel function model(y)
    al = m.dist.normal(1, 0.5, name="al")
    ap = m.dist.normal(-1.5, 1, name="ap")
    p = m.link.inv_logit(ap)
    lambda_ = jnp.exp(al)
    m.dist.zero_inflated_poisson(p, lambda_, obs=y)
end

# Run mcmc ------------------------------------------------
m.fit(model)  # Optimize model parameters through MCMC sampling

# Summary ------------------------------------------------
m.summary() # Get posterior distributions

jax.local_device_count 32


sample: 100%|██████████| 1000/1000 [00:00<00:00, 1461.98it/s, 3 steps of size 6.76e-01. acc. prob=0.89]


Unnamed: 0,mean,sd,hdi_5.5%,hdi_94.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
al,0.15,0.09,0.03,0.3,0.01,0.0,185.0,264.67,
ap,-0.74,0.23,-1.08,-0.34,0.02,0.01,179.56,283.56,
