# Statsmodels
* https://www.kaggle.com/ojwatson/mixed-models

In [2]:
%matplotlib inline

import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.tools.sm_exceptions import ConvergenceWarning

In [3]:
# Formula indicates mean weight as a linear function of time. Random intercept for each pig. 
data = sm.datasets.get_rdataset('dietox', 'geepack').data
md = smf.mixedlm("Weight ~ Time", data, groups=data["Pig"], re_formula="~Time")
mdf = md.fit(method=["lbfgs"])
print(mdf.summary())

           Mixed Linear Model Regression Results
Model:             MixedLM  Dependent Variable:  Weight    
No. Observations:  861      Method:              REML      
No. Groups:        72       Scale:               6.0372    
Min. group size:   11       Log-Likelihood:      -2217.0475
Max. group size:   12       Converged:           Yes       
Mean group size:   12.0                                    
-----------------------------------------------------------
                 Coef.  Std.Err.   z    P>|z| [0.025 0.975]
-----------------------------------------------------------
Intercept        15.739    0.550 28.603 0.000 14.660 16.817
Time              6.939    0.080 86.925 0.000  6.783  7.095
Group Var        19.503    1.561                           
Group x Time Cov  0.294    0.153                           
Time Var          0.416    0.033                           



In [4]:
rmse = mdf.fittedvalues

# Stats models on the FQI part

In [5]:
with open('train_tuples.json', 'r') as f:
    train_dict = json.load(f)
with open('test_tuples.json', 'r') as f:
    test_dict = json.load(f)

In [6]:
train_df = pd.DataFrame.from_dict(train_dict)
test_df = pd.DataFrame.from_dict(test_dict)
train_df.columns

Index(['a0', 'a1', 'r', 'ds', 's0', 's1', 's2', 's3', 's4', 's5', 's6', 's7',
       's8', 's9'],
      dtype='object')

In [8]:
y = train_df['r']
X = train_df[['a0', 'a1', 's0', 's1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9']]
groups = train_df['ds']

In [9]:
model = sm.MixedLM(endog=y, exog=X, groups=groups)
result = model.fit()



In [20]:
testX = test_df[['a0', 'a1', 's0', 's1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9']]
predictions = result.predict(exog=testX)

In [24]:
# Sanity check to see if predictions == y_hat=beta0+beta1_s*x
fe_coeffs = np.asarray(result.fe_params)
fe_coeffs = np.reshape(fe_coeffs, (1, 12))
x_test = testX.to_numpy()
manual_preds = np.dot(fe_coeffs, x_test.T)
manual_preds[0]

array([ 4.42198702,  9.03359092, 26.39228657, ..., 21.49984278,
        6.12003501, 15.71244469])

In [32]:
for dp, mp in zip(predictions, manual_preds[0]):
    if str(dp) != str(mp):
        print("Pred: " + str(dp) + " Sanity: " + str(mp))

In [30]:
result.params

a0           5.817828
a1           1.775593
s0           1.321432
s1           3.118893
s2           5.576449
s3          -0.708489
s4          -1.752976
s5           1.096265
s6           1.588758
s7          -0.882082
s8          -7.679591
s9          -0.603794
Group Var    0.065379
dtype: float64

In [31]:
result.fe_params

a0    5.817828
a1    1.775593
s0    1.321432
s1    3.118893
s2    5.576449
s3   -0.708489
s4   -1.752976
s5    1.096265
s6    1.588758
s7   -0.882082
s8   -7.679591
s9   -0.603794
dtype: float64

In [37]:
result.random_effects

{0: Group Var    0.007656
 dtype: float64,
 1: Group Var    0.000603
 dtype: float64}

## Using BinomialBayesMixedGLM

In [34]:
y = train_df['r']
X = train_df[['a0', 'a1', 's0', 's1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9']]
groups = train_df['ds']

In [None]:
model = sm.MixedLM(endog=y, exog=X, groups=groups)
result = model.fit()

## From formula

In [57]:
model = sm.MixedLM.from_formula("r ~ a0 + a1 + s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9", train_df, groups=train_df['ds'], re_formula='a0 + a1 + s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9')
result = model.fit()



In [52]:
result.fe_params

Intercept    0.009911
a0           5.817100
a1           1.776143
s0           1.321474
s1           3.117673
s2           5.576962
s3          -0.709580
s4          -1.752500
s5           1.095225
s6           1.589119
s7          -0.882842
s8          -7.680264
s9          -0.603790
dtype: float64

In [53]:
result.random_effects

{0: Group    0.007392
 a0      -0.008371
 a1       0.006599
 s0      -0.004872
 s1       0.004685
 s2      -0.002982
 s3       0.007208
 s4      -0.001895
 s5       0.003350
 s6      -0.005965
 s7       0.008608
 s8       0.001627
 s9       0.002616
 dtype: float64,
 1: Group   -0.007392
 a0       0.008371
 a1      -0.006599
 s0       0.004872
 s1      -0.004685
 s2       0.002982
 s3      -0.007208
 s4       0.001895
 s5      -0.003350
 s6       0.005965
 s7      -0.008608
 s8      -0.001627
 s9      -0.002616
 dtype: float64}