In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from BetabinGLM import BetaBinomial, BetaBinomialAlternative

# Load dataset

The tested dataset is the Star98 Educational Dataset from the statsmodels. Specifically, this dataset contains 303 obervations, 20 independent variables and a binary response variable.

More about this dataset: https://www.statsmodels.org/dev/datasets/generated/star98.html

In [2]:
data = sm.datasets.star98.load()

exog = data.exog
endog = data.endog.to_numpy()

# BetabinGLM

In [3]:
model = BetaBinomial(endog, exog)

  a = Z * (1/phi - 1)
  b = (1 - Z) * (1/phi - 1)


## To obtain the log-likelihood

In [12]:
model.LL

-1482.6810610482644

## To obtain the optimized weights and phi

In [4]:
model.W, model.phi

(array([-2.04116859e-02,  1.49628871e-02, -1.88986905e-02, -9.99623863e-03,
         1.46789023e-01,  2.63005456e-02,  2.86913143e-02, -5.72235063e-01,
        -4.87353441e-02, -4.43173149e-02, -1.91913101e-03, -2.33344560e-03,
        -5.93894370e-03, -2.53770855e-03, -3.57651519e-04,  1.94954284e-02,
         1.69230576e-02,  1.37846772e-03,  1.06064593e-04, -5.68470296e-04,
         6.84091335e-01]),
 0.021602411761346488)

## To obtain the predicted value of positive observations and negative observations

In [5]:
model.predict

array([[4.66433169e+02, 3.40566831e+02],
       [1.40836592e+02, 4.31634083e+01],
       [2.61472786e+02, 3.09527214e+02],
       [3.92458519e+02, 1.80541481e+02],
       [1.70293869e+01, 4.79706131e+01],
       [1.55134664e+03, 6.95653359e+02],
       [4.34270823e+02, 9.29729177e+02],
       [5.81913311e+02, 3.30086689e+02],
       [2.65062855e+02, 2.59937145e+02],
       [5.24597331e+02, 5.42402669e+02],
       [6.98728466e+02, 2.31727153e+03],
       [2.00895641e+02, 3.41043593e+01],
       [2.62665374e+02, 2.93334626e+02],
       [2.96924247e+02, 3.91075753e+02],
       [1.69702573e+02, 8.22974271e+01],
       [6.85218838e+02, 2.39781162e+02],
       [1.96386696e+02, 1.80613304e+02],
       [2.89749132e+01, 4.00250868e+01],
       [5.86863993e+02, 5.05136007e+02],
       [6.50462535e+01, 4.99537465e+01],
       [4.83283502e+01, 9.06716498e+01],
       [2.23296434e+02, 2.25703566e+02],
       [1.47961201e+02, 1.61038799e+02],
       [3.96698993e+01, 7.63301007e+01],
       [1.810984

# An alternative 

## BetabinGLM

In [6]:
model1 = BetaBinomialAlternative(endog, exog)

## To obtain the log-likelihood

In [7]:
model1.LL

-1483.0254187895334

## To obtain the optimized weights and phi

In [8]:
model1.W, model1.phi

(array([-2.03334594e-02,  1.48923040e-02, -1.92189204e-02, -9.93367624e-03,
         7.54158270e-02, -3.05107958e-02,  1.55974703e-02, -2.35840579e-01,
         2.14470511e-02, -9.12423018e-03, -1.99228542e-03, -2.28299664e-03,
        -1.03328035e-03, -1.38124410e-03,  5.83942137e-04,  3.25455463e-03,
         9.19087076e-03, -2.88564344e-04,  2.69841640e-05, -1.97880074e-04,
         1.35841491e-03]),
 0.021668888882735892)

## To obtain the predicted value of positive observations and negative observations

In [13]:
model1.predict

array([[4.67415716e+02, 3.39584284e+02],
       [1.41113209e+02, 4.28867910e+01],
       [2.61258131e+02, 3.09741869e+02],
       [3.92580159e+02, 1.80419841e+02],
       [1.71078619e+01, 4.78921381e+01],
       [1.55057586e+03, 6.96424137e+02],
       [4.34265881e+02, 9.29734119e+02],
       [5.79672190e+02, 3.32327810e+02],
       [2.64455578e+02, 2.60544422e+02],
       [5.20970504e+02, 5.46029496e+02],
       [7.14884194e+02, 2.30111581e+03],
       [2.00531204e+02, 3.44687958e+01],
       [2.61818393e+02, 2.94181607e+02],
       [2.95376355e+02, 3.92623645e+02],
       [1.70167064e+02, 8.18329361e+01],
       [6.84795039e+02, 2.40204961e+02],
       [1.95175510e+02, 1.81824490e+02],
       [2.92070558e+01, 3.97929442e+01],
       [5.86564593e+02, 5.05435407e+02],
       [6.51081698e+01, 4.98918302e+01],
       [4.81486022e+01, 9.08513978e+01],
       [2.22402216e+02, 2.26597784e+02],
       [1.47886358e+02, 1.61113642e+02],
       [3.97386449e+01, 7.62613551e+01],
       [1.838007