In [20]:
import numpy as np
import pandas as pd
from scipy.stats import multivariate_normal, multinomial
from scipy.special import softmax

np.random.seed(42)

# Settings
num_companies = 40
num_categories = 4  # categories: 0 (pivot), 1, 2, 3


# 1. Generate random effects for each company (for categories 1-3, pivot is category 0)
mean_effect = np.zeros(num_categories - 1)
# Define covariance matrix with correlations between categories
cov_effect = np.array([
    [1.0, 0.5, 0.3],
    [0.5, 1.0, 0.4],
    [0.3, 0.4, 1.0]
])

company_effects = multivariate_normal.rvs(mean=mean_effect, cov=cov_effect, size=num_companies)
# Add pivot logits = 0
company_effects = np.hstack([np.zeros((num_companies, 1)), company_effects])

# Store company random effects in a DataFrame
company_ids = [f'company_{i}' for i in range(num_companies)]
company_effects_df = pd.DataFrame(company_effects, columns=[f'logit_{i}' for i in range(num_categories)])
company_effects_df['company'] = company_ids

# 2. Generate observations per company
data = []
for idx, company in enumerate(company_ids):
    n_obs = np.random.randint(1, 11)  # Between 1 and 10 observations per company
    
  
    logits = company_effects[idx]
    probs = softmax(logits)
    for obs in range(n_obs):
          
        observed_type = np.random.choice(num_categories, p=probs)
        
        # Append observation
        data.append({
            'company': company,
            'type': f'type_{observed_type}'
        })

# Convert to DataFrame
simulated_df = pd.DataFrame(data)


print(simulated_df.head(10))


     company    type
0  company_0  type_3
1  company_1  type_0
2  company_1  type_3
3  company_1  type_0
4  company_1  type_3
5  company_1  type_2
6  company_1  type_0
7  company_1  type_0
8  company_1  type_2
9  company_2  type_1


Work in progress... still not doing what i think it should be doing. I want it to do what (1 | type | company) does in brms

In [21]:
import bambi as bmb

model = bmb.Model('type ~   (1| company)', data=simulated_df, family='categorical')
results = model.fit()


Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [Intercept, 1|company_sigma, 1|company_offset]


Output()

Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 2 seconds.


In [22]:
import arviz as az
az.summary(results ,var_names = ['~company'])



Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
Intercept[type_1],-0.048,0.278,-0.534,0.481,0.004,0.004,3897.0,3274.0,1.0
Intercept[type_2],-0.117,0.280,-0.616,0.442,0.004,0.004,4206.0,3040.0,1.0
Intercept[type_3],0.174,0.267,-0.317,0.693,0.004,0.004,4118.0,3103.0,1.0
1|company_sigma,0.980,0.193,0.625,1.358,0.005,0.003,1512.0,2317.0,1.0
"1|company[type_1, company_0]",-0.197,0.947,-1.967,1.569,0.013,0.022,5882.0,2738.0,1.0
...,...,...,...,...,...,...,...,...,...
"1|company[type_3, company_5]",1.317,0.687,0.020,2.593,0.011,0.010,4231.0,2985.0,1.0
"1|company[type_3, company_6]",1.191,0.855,-0.471,2.754,0.013,0.014,4259.0,3197.0,1.0
"1|company[type_3, company_7]",-0.191,0.780,-1.634,1.316,0.011,0.016,5436.0,2481.0,1.0
"1|company[type_3, company_8]",0.677,0.808,-0.868,2.155,0.012,0.014,4558.0,3009.0,1.0
