
## DESCRIPTION

This notebook fits a series of generalized linear mixed models

***
### SETUP

Load the setup script

In [3]:
from aqi.utils.common_setup import *

Load the functions to :

- Fit the models
- Save the models
- Load the models

In [4]:
from aqi.utils.functions import fit_models, save_fitted_models, load_fitted_models

Import the cleaned dataset

In [5]:
data_folder = os.path.join(os.getcwd(), '..', 'data')
data = pd.read_csv(os.path.join(data_folder, 'clean_data.csv'))

In [6]:
# Count the number of unique hive_id values
print(f"Number of unique hives: {data['hid'].nunique()}")
print(f"Number of unique operations: {data['operation_id'].nunique()}")

Number of unique hives: 103477
Number of unique operations: 17


In [5]:
len(data)

112626

In [5]:
data.columns

Index(['hid', 'creation_date', 'operation_id', 'death_date', 'season',
       'season_start_month', 'season_start_day', 'death_next_season',
       'hive_age_next_season', 'aqhi_average', 'prcp_average', 'wspd_average',
       'ndvi_average', 'tavg_average', 'o3_average', 'aqhi_skew', 'prcp_skew',
       'wspd_skew', 'ndvi_skew', 'tavg_skew', 'o3_skew', 'aqhi_max',
       'prcp_max', 'wspd_max', 'ndvi_max', 'tavg_max', 'o3_max', 'region',
       'wspd_average_og', 'tavg_average_og', 'prcp_average_og',
       'aqhi_average_og'],
      dtype='object')

### IF PackageNotInstalledError: The R package "lme4" is not installed. 


See README for instruction


```
from rpy2.robjects.packages import importr


utils = importr('utils')
utils.chooseCRANmirror(ind=12)
utils.install_packages('lme4')
```

### FIT GLMM

### Define model formulas

There are 33 models starting from model 0

For features, I use the log of precipitation --> prcp_average

The rest is original data --> anything og

See 02_CleanData.ipynb for details

In [36]:
model_formulas = [
    
    # Null model ------------------------------------------------

    # model0
    "death_next_season ~ 1 + (1|region)",

    # Models with aqhi average and region -----------------------

    # model1
    "death_next_season ~ aqhi_average_og + ndvi_average + (aqhi_average|region)",

    # model2
    "death_next_season ~ aqhi_average_og * ndvi_average + (aqhi_average_og|region)",

    # model3
    "death_next_season ~ aqhi_average_og + ndvi_average + tavg_average_og + prcp_average + wspd_average_og + (aqhi_average_og|region)",

    # model4
    "death_next_season ~ aqhi_average_og * wspd_average_og + ndvi_average + prcp_average + tavg_average_og + (aqhi_average_og|region)",

    # model5
    "death_next_season ~ aqhi_average_og * ndvi_average + wspd_average_og + (ndvi_average|region)",

    # model6
    "death_next_season ~ aqhi_average_og * wspd_average_og * tavg_average_og + (aqhi_average_og|region)",

    # model7
    "death_next_season ~ aqhi_average_og * wspd_average_og * ndvi_average + (ndvi_average|region)",

    # Models with aqhi average and operation id -----------------

    # model8
    "death_next_season ~ aqhi_average_og * ndvi_average + (1|operation_id)",

    # model9
    "death_next_season ~ aqhi_average_og * ndvi_average + (aqhi_average_og|operation_id)",

    # model10
    "death_next_season ~ aqhi_average_og + ndvi_average + wspd_average_og + (1|operation_id)",

    # model11
    "death_next_season ~ aqhi_average_og + ndvi_average + wspd_average_og + (aqhi_average_og|operation_id)",

    # model12
    "death_next_season ~ aqhi_average_og * wspd_average_og + ndvi_average + (aqhi_average_og|operation_id)",

    # model13
    "death_next_season ~ aqhi_average_og * ndvi_average + wspd_average_og + (1|operation_id)",

    # model14
    "death_next_season ~ aqhi_average_og * ndvi_average * wspd_average_og + (1|operation_id)",

    # model15
    "death_next_season ~ aqhi_average_og * ndvi_average * wspd_average_og + (aqhi_average_og|operation_id)",

    # model16
    "death_next_season ~ aqhi_average_og + ndvi_average + wspd_average_og + tavg_average_og + prcp_average + (aqhi_average_og|operation_id)",

    # model17
    "death_next_season ~ aqhi_average_og * ndvi_average + tavg_average_og * prcp_average + wspd_average_og + (aqhi_average_og|operation_id)",

    # Models with aqhi skew and region --------------------------
    
    # model18
    "death_next_season ~ aqhi_skew + ndvi_skew + (aqhi_skew|region)",

    # model19
    "death_next_season ~ aqhi_skew * ndvi_skew + (aqhi_skew|region)",

    # model20
    "death_next_season ~ aqhi_skew * ndvi_average + wspd_average_og + (aqhi_skew|region)",

    # model21
    "death_next_season ~ aqhi_skew * ndvi_average * wspd_average_og + (aqhi_skew|region)",

    # Models with aqhi max ---------------------------

    # model22
    "death_next_season ~ aqhi_max * ndvi_average + wspd_average_og + (ndvi_average|region)",

    # model23
    "death_next_season ~ aqhi_max * ndvi_average * wspd_average_og + (ndvi_average|region)",

    # model24
    "death_next_season ~ aqhi_max * ndvi_average + wspd_average_og + (aqhi_average_og|operation_id)",

    # model25
    "death_next_season ~ aqhi_max * ndvi_average * wspd_average_og + (aqhi_average_og|operation_id)",

    # Models with ndvi skew --------------------------

    # model26
    "death_next_season ~ aqhi_average_og * ndvi_skew + wspd_average_og + (aqhi_average_og|region)",

    # model27
    "death_next_season ~ aqhi_average_og * ndvi_skew * wspd_average_og + (aqhi_average_og|region)",

    # model28
    "death_next_season ~ aqhi_average_og * ndvi_skew * wspd_average_og + (aqhi_average_og|operation_id)",

    # Models with O3 and operation id ---------------------------

    # model29
    "death_next_season ~ ndvi_average + o3_average + (o3_average|operation_id)",

    # model30
    "death_next_season ~ ndvi_average * o3_average + (o3_average|operation_id)",

    # model31
    "death_next_season ~ ndvi_average * o3_average + wspd_average_og + (o3_average|operation_id)",

    # model32
    "death_next_season ~ ndvi_average * o3_average * wspd_average_og + (o3_average|operation_id)"

]

### Standardize all variables

In [33]:
from sklearn.preprocessing import StandardScaler

# Variables
variables_to_scale = [
    'aqhi_average_og', 'ndvi_average', 'wspd_average_og', 
    'tavg_average_og', 'prcp_average', 'o3_average',
    'aqhi_skew', 'aqhi_max', 'ndvi_skew'
]

# Initialize the scaler
scaler = StandardScaler()

# Overwrite original variables names with scaled variables
data[variables_to_scale] = scaler.fit_transform(data[variables_to_scale])

In [34]:
print(data[variables_to_scale].describe())

       aqhi_average_og  ndvi_average  wspd_average_og  tavg_average_og  \
count     1.126260e+05  1.126260e+05     1.126260e+05     1.126260e+05   
mean     -1.615071e-16 -1.736201e-16     6.056515e-17     3.633909e-17   
std       1.000004e+00  1.000004e+00     1.000004e+00     1.000004e+00   
min      -2.656698e+00 -8.433916e+00    -5.001226e+00    -5.900251e+00   
25%      -7.633126e-01 -4.662709e-01    -5.466750e-01    -7.052172e-01   
50%      -3.487361e-01 -1.513841e-01     1.065399e-02    -1.949752e-01   
75%       9.614525e-01  3.347263e-01     5.835905e-01     6.022519e-01   
max       2.967133e+00  1.472707e+01     8.448379e+00     3.778067e+00   

       prcp_average    o3_average     aqhi_skew      aqhi_max     ndvi_skew  
count  1.126260e+05  1.126260e+05  1.121070e+05  1.126260e+05  1.121570e+05  
mean   4.037676e-17  4.602951e-16 -5.678916e-17  1.049796e-16  3.745400e-16  
std    1.000004e+00  1.000004e+00  1.000004e+00  1.000004e+00  1.000004e+00  
min   -5.548384e+00 -

Inspect how many models

In [39]:
len(model_formulas)

33

In [40]:
data.columns

Index(['hid', 'creation_date', 'operation_id', 'death_date', 'season',
       'season_start_month', 'season_start_day', 'death_next_season',
       'hive_age_next_season', 'aqhi_average', 'prcp_average', 'wspd_average',
       'ndvi_average', 'tavg_average', 'o3_average', 'aqhi_skew', 'prcp_skew',
       'wspd_skew', 'ndvi_skew', 'tavg_skew', 'o3_skew', 'aqhi_max',
       'prcp_max', 'wspd_max', 'ndvi_max', 'tavg_max', 'o3_max', 'region',
       'wspd_average_og', 'tavg_average_og', 'prcp_average_og',
       'aqhi_average_og'],
      dtype='object')

### Fit the models and save (**run once**)

Fitting all models takes **approximately 1.2 hours.**

Run once and save in ```aqi/outputs``` folder so you can reimport in session.

File is called ```fitted_GLMMs.pkl```

In [41]:
fitted_models, model_summaries = fit_models(
    data=data,
    formulas=model_formulas
)

save_fitted_models(
    fitted_models=fitted_models,
    filename="fitted_GLMMs.pkl"
)

save_fitted_models(
    fitted_models=model_summaries,
    filename="summaries_GLMMs.pkl"
)

Fitting Models:   0%|          | 0/33 [00:00<?, ?it/s]

Fitting model0 with formula: death_next_season ~ 1 + (1|region)


Fitting Models:   3%|▎         | 1/33 [00:17<09:33, 17.93s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~1+(1|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -71157.617 	 AIC: 142319.235

Random effects:

               Name    Var    Std
region  (Intercept)  0.979  0.989

No random effect correlations specified

Fixed effects:

Fitting model1 with formula: death_next_season ~ aqhi_average_og + ndvi_average + (aqhi_average|region)


Fitting Models:   6%|▌         | 2/33 [01:12<20:26, 39.56s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average_og+ndvi_average+(aqhi_average|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69467.480 	 AIC: 138946.959

Random effects:

                Name    Var    Std
region   (Intercept)  5.705  2.388
region  aqhi_average  3.938  1.985

                IV1           IV2   Corr
region  (Intercept)  aqhi_average -0.844

Fixed effects:

Fitting model2 with formula: death_next_season ~ aqhi_average_og * ndvi_average + (aqhi_average_og|region)


Fitting Models:   9%|▉         | 3/33 [02:12<24:31, 49.04s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average_og*ndvi_average+(aqhi_average_og|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69334.636 	 AIC: 138683.273

Random effects:

                   Name    Var    Std
region      (Intercept)  1.778  1.333
region  aqhi_average_og  0.184  0.429

                IV1              IV2   Corr
region  (Intercept)  aqhi_average_og  0.383

Fixed effects:

Fitting model3 with formula: death_next_season ~ aqhi_average_og + ndvi_average + tavg_average_og + prcp_average + wspd_average_og + (aqhi_average_og|region)


Fitting Models:  12%|█▏        | 4/33 [03:12<25:47, 53.35s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average_og+ndvi_average+tavg_average_og+prcp_average+wspd_average_og+(aqhi_average_og|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -68408.350 	 AIC: 136834.701

Random effects:

                   Name    Var    Std
region      (Intercept)  2.050  1.432
region  aqhi_average_og  0.181  0.426

                IV1              IV2   Corr
region  (Intercept)  aqhi_average_og -0.208

Fixed effects:

Fitting model4 with formula: death_next_season ~ aqhi_average_og * wspd_average_og + ndvi_average + prcp_average + tavg_average_og + (aqhi_average_og|region)


Fitting Models:  15%|█▌        | 5/33 [04:20<27:20, 58.57s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average_og*wspd_average_og+ndvi_average+prcp_average+tavg_average_og+(aqhi_average_og|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -68406.866 	 AIC: 136833.732

Random effects:

                   Name    Var    Std
region      (Intercept)  2.046  1.430
region  aqhi_average_og  0.180  0.425

                IV1              IV2   Corr
region  (Intercept)  aqhi_average_og -0.213

Fixed effects:

Fitting model5 with formula: death_next_season ~ aqhi_average_og * ndvi_average + wspd_average_og + (ndvi_average|region)


Fitting Models:  18%|█▊        | 6/33 [05:30<28:00, 62.22s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average_og*ndvi_average+wspd_average_og+(ndvi_average|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69546.453 	 AIC: 139108.905

Random effects:

                Name    Var    Std
region   (Intercept)  1.946  1.395
region  ndvi_average  0.333  0.577

                IV1           IV2  Corr
region  (Intercept)  ndvi_average  0.56

Fixed effects:

Fitting model6 with formula: death_next_season ~ aqhi_average_og * wspd_average_og * tavg_average_og + (aqhi_average_og|region)


Fitting Models:  21%|██        | 7/33 [07:04<31:32, 72.78s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average_og*wspd_average_og*tavg_average_og+(aqhi_average_og|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -68431.613 	 AIC: 136885.226

Random effects:

                   Name    Var    Std
region      (Intercept)  1.926  1.388
region  aqhi_average_og  0.155  0.394

                IV1              IV2   Corr
region  (Intercept)  aqhi_average_og -0.051

Fixed effects:

Fitting model7 with formula: death_next_season ~ aqhi_average_og * wspd_average_og * ndvi_average + (ndvi_average|region)


Fitting Models:  24%|██▍       | 8/33 [08:42<33:39, 80.78s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average_og*wspd_average_og*ndvi_average+(ndvi_average|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69533.058 	 AIC: 139088.117

Random effects:

                Name    Var    Std
region   (Intercept)  1.943  1.394
region  ndvi_average  0.345  0.588

                IV1           IV2   Corr
region  (Intercept)  ndvi_average  0.564

Fixed effects:

Fitting model8 with formula: death_next_season ~ aqhi_average_og * ndvi_average + (1|operation_id)


Fitting Models:  27%|██▋       | 9/33 [09:14<26:16, 65.68s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average_og*ndvi_average+(1|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -66457.161 	 AIC: 132924.321

Random effects:

                     Name    Var   Std
operation_id  (Intercept)  2.279  1.51

No random effect correlations specified

Fixed effects:

Fitting model9 with formula: death_next_season ~ aqhi_average_og * ndvi_average + (aqhi_average_og|operation_id)


Fitting Models:  30%|███       | 10/33 [10:06<23:32, 61.43s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average_og*ndvi_average+(aqhi_average_og|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -65665.016 	 AIC: 131344.033

Random effects:

                         Name    Var    Std
operation_id      (Intercept)  2.213  1.488
operation_id  aqhi_average_og  2.287  1.512

                      IV1              IV2   Corr
operation_id  (Intercept)  aqhi_average_og  0.323

Fixed effects:

Fitting model10 with formula: death_next_season ~ aqhi_average_og + ndvi_average + wspd_average_og + (1|operation_id)


Fitting Models:  33%|███▎      | 11/33 [10:39<19:20, 52.76s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average_og+ndvi_average+wspd_average_og+(1|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -66477.666 	 AIC: 132965.333

Random effects:

                     Name    Var   Std
operation_id  (Intercept)  2.309  1.52

No random effect correlations specified

Fixed effects:

Fitting model11 with formula: death_next_season ~ aqhi_average_og + ndvi_average + wspd_average_og + (aqhi_average_og|operation_id)


Fitting Models:  36%|███▋      | 12/33 [11:32<18:24, 52.58s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average_og+ndvi_average+wspd_average_og+(aqhi_average_og|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -65676.662 	 AIC: 131367.324

Random effects:

                         Name    Var    Std
operation_id      (Intercept)  2.225  1.492
operation_id  aqhi_average_og  2.185  1.478

                      IV1              IV2   Corr
operation_id  (Intercept)  aqhi_average_og  0.277

Fixed effects:

Fitting model12 with formula: death_next_season ~ aqhi_average_og * wspd_average_og + ndvi_average + (aqhi_average_og|operation_id)


Fitting Models:  39%|███▉      | 13/33 [12:44<19:31, 58.59s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average_og*wspd_average_og+ndvi_average+(aqhi_average_og|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -65674.469 	 AIC: 131364.939

Random effects:

                         Name    Var    Std
operation_id      (Intercept)  2.228  1.493
operation_id  aqhi_average_og  2.180  1.476

                      IV1              IV2   Corr
operation_id  (Intercept)  aqhi_average_og  0.272

Fixed effects:

Fitting model13 with formula: death_next_season ~ aqhi_average_og * ndvi_average + wspd_average_og + (1|operation_id)


Fitting Models:  42%|████▏     | 14/33 [13:25<16:50, 53.20s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average_og*ndvi_average+wspd_average_og+(1|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -66381.713 	 AIC: 132775.426

Random effects:

                     Name    Var    Std
operation_id  (Intercept)  2.393  1.547

No random effect correlations specified

Fixed effects:

Fitting model14 with formula: death_next_season ~ aqhi_average_og * ndvi_average * wspd_average_og + (1|operation_id)


Fitting Models:  45%|████▌     | 15/33 [14:21<16:15, 54.21s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average_og*ndvi_average*wspd_average_og+(1|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -66372.323 	 AIC: 132762.646

Random effects:

                     Name    Var    Std
operation_id  (Intercept)  2.395  1.548

No random effect correlations specified

Fixed effects:

Fitting model15 with formula: death_next_season ~ aqhi_average_og * ndvi_average * wspd_average_og + (aqhi_average_og|operation_id)


Fitting Models:  48%|████▊     | 16/33 [16:04<19:30, 68.83s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average_og*ndvi_average*wspd_average_og+(aqhi_average_og|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -65623.299 	 AIC: 131268.598

Random effects:

                         Name    Var    Std
operation_id      (Intercept)  2.226  1.492
operation_id  aqhi_average_og  2.260  1.503

                      IV1              IV2  Corr
operation_id  (Intercept)  aqhi_average_og   0.3

Fixed effects:

Fitting model16 with formula: death_next_season ~ aqhi_average_og + ndvi_average + wspd_average_og + tavg_average_og + prcp_average + (aqhi_average_og|operation_id)


Fitting Models:  52%|█████▏    | 17/33 [17:19<18:52, 70.77s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average_og+ndvi_average+wspd_average_og+tavg_average_og+prcp_average+(aqhi_average_og|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -64749.028 	 AIC: 129516.056

Random effects:

                         Name    Var    Std
operation_id      (Intercept)  3.119  1.766
operation_id  aqhi_average_og  2.475  1.573

                      IV1              IV2   Corr
operation_id  (Intercept)  aqhi_average_og  0.285

Fixed effects:

Fitting model17 with formula: death_next_season ~ aqhi_average_og * ndvi_average + tavg_average_og * prcp_average + wspd_average_og + (aqhi_average_og|operation_id)


Fitting Models:  55%|█████▍    | 18/33 [19:25<21:49, 87.30s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average_og*ndvi_average+tavg_average_og*prcp_average+wspd_average_og+(aqhi_average_og|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -64744.864 	 AIC: 129511.727

Random effects:

                         Name    Var    Std
operation_id      (Intercept)  3.114  1.765
operation_id  aqhi_average_og  2.445  1.564

                      IV1              IV2   Corr
operation_id  (Intercept)  aqhi_average_og  0.297

Fixed effects:

Fitting model18 with formula: death_next_season ~ aqhi_skew + ndvi_skew + (aqhi_skew|region)


Fitting Models:  58%|█████▊    | 19/33 [20:00<16:43, 71.66s/it]

**NOTE**: Column for 'residuals' not created in model.data, but saved in model.resid only. This is because you have rows with NaNs in your data.

**NOTE** Column for 'fits' not created in model.data, but saved in model.fits only. This is because you have rows with NaNs in your data.

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_skew+ndvi_skew+(aqhi_skew|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69909.878 	 AIC: 139831.757

Random effects:

               Name    Var    Std
region  (Intercept)  0.938  0.969
region    aqhi_skew  0.303  0.550

                IV1        IV2   Corr
region  (Intercept)  aqhi_skew -0.309

Fixed effects:

Fitting model19 with formula: death_next_season ~ aqhi_skew * ndvi_skew + (aqhi_skew|region)


Fitting Models:  61%|██████    | 20/33 [20:46<13:48, 63.71s/it]

**NOTE**: Column for 'residuals' not created in model.data, but saved in model.resid only. This is because you have rows with NaNs in your data.

**NOTE** Column for 'fits' not created in model.data, but saved in model.fits only. This is because you have rows with NaNs in your data.

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_skew*ndvi_skew+(aqhi_skew|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69549.952 	 AIC: 139113.904

Random effects:

               Name    Var    Std
region  (Intercept)  0.929  0.964
region    aqhi_skew  0.317  0.563

                IV1        IV2   Corr
region  (Intercept)  aqhi_skew -0.327

Fixed effects:

Fitting model20 with formula: death_next_season ~ aqhi_skew * ndvi_average + wspd_average_og + (aqhi_skew|region)


Fitting Models:  64%|██████▎   | 21/33 [21:42<12:17, 61.43s/it]

**NOTE**: Column for 'residuals' not created in model.data, but saved in model.resid only. This is because you have rows with NaNs in your data.

**NOTE** Column for 'fits' not created in model.data, but saved in model.fits only. This is because you have rows with NaNs in your data.

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_skew*ndvi_average+wspd_average_og+(aqhi_skew|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69008.256 	 AIC: 138032.512

Random effects:

               Name    Var    Std
region  (Intercept)  1.064  1.031
region    aqhi_skew  0.288  0.537

                IV1        IV2   Corr
region  (Intercept)  aqhi_skew -0.372

Fixed effects:

Fitting model21 with formula: death_next_season ~ aqhi_skew * ndvi_average * wspd_average_og + (aqhi_skew|region)


Fitting Models:  67%|██████▋   | 22/33 [22:53<11:49, 64.51s/it]

**NOTE**: Column for 'residuals' not created in model.data, but saved in model.resid only. This is because you have rows with NaNs in your data.

**NOTE** Column for 'fits' not created in model.data, but saved in model.fits only. This is because you have rows with NaNs in your data.

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_skew*ndvi_average*wspd_average_og+(aqhi_skew|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -68988.392 	 AIC: 137998.783

Random effects:

               Name    Var    Std
region  (Intercept)  1.069  1.034
region    aqhi_skew  0.267  0.517

                IV1        IV2   Corr
region  (Intercept)  aqhi_skew -0.391

Fixed effects:

Fitting model22 with formula: death_next_season ~ aqhi_max * ndvi_average + wspd_average_og + (ndvi_average|region)


Fitting Models:  70%|██████▉   | 23/33 [23:42<09:56, 59.66s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_max*ndvi_average+wspd_average_og+(ndvi_average|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69971.489 	 AIC: 139958.979

Random effects:

                Name    Var    Std
region   (Intercept)  1.571  1.253
region  ndvi_average  0.410  0.640

                IV1           IV2   Corr
region  (Intercept)  ndvi_average  0.778

Fixed effects:

Fitting model23 with formula: death_next_season ~ aqhi_max * ndvi_average * wspd_average_og + (ndvi_average|region)


Fitting Models:  73%|███████▎  | 24/33 [25:11<10:15, 68.40s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_max*ndvi_average*wspd_average_og+(ndvi_average|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69921.390 	 AIC: 139864.781

Random effects:

                Name    Var    Std
region   (Intercept)  1.600  1.265
region  ndvi_average  0.445  0.667

                IV1           IV2   Corr
region  (Intercept)  ndvi_average  0.779

Fixed effects:

Fitting model24 with formula: death_next_season ~ aqhi_max * ndvi_average + wspd_average_og + (aqhi_average_og|operation_id)


Fitting Models:  76%|███████▌  | 25/33 [26:34<09:42, 72.83s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_max*ndvi_average+wspd_average_og+(aqhi_average_og|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -65310.212 	 AIC: 130636.424

Random effects:

                         Name    Var    Std
operation_id      (Intercept)  2.773  1.665
operation_id  aqhi_average_og  3.428  1.851

                      IV1              IV2   Corr
operation_id  (Intercept)  aqhi_average_og  0.534

Fixed effects:

Fitting model25 with formula: death_next_season ~ aqhi_max * ndvi_average * wspd_average_og + (aqhi_average_og|operation_id)


Fitting Models:  79%|███████▉  | 26/33 [28:12<09:22, 80.36s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_max*ndvi_average*wspd_average_og+(aqhi_average_og|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -65256.332 	 AIC: 130534.664

Random effects:

                         Name    Var    Std
operation_id      (Intercept)  2.801  1.674
operation_id  aqhi_average_og  3.585  1.894

                      IV1              IV2   Corr
operation_id  (Intercept)  aqhi_average_og  0.503

Fixed effects:

Fitting model26 with formula: death_next_season ~ aqhi_average_og * ndvi_skew + wspd_average_og + (aqhi_average_og|region)


Fitting Models:  82%|████████▏ | 27/33 [29:27<07:53, 78.93s/it]

**NOTE**: Column for 'residuals' not created in model.data, but saved in model.resid only. This is because you have rows with NaNs in your data.

**NOTE** Column for 'fits' not created in model.data, but saved in model.fits only. This is because you have rows with NaNs in your data.

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average_og*ndvi_skew+wspd_average_og+(aqhi_average_og|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69690.056 	 AIC: 139396.112

Random effects:

                   Name    Var    Std
region      (Intercept)  1.890  1.375
region  aqhi_average_og  0.173  0.416

                IV1              IV2   Corr
region  (Intercept)  aqhi_average_og  0.521

Fixed effects:

Fitting model27 with formula: death_next_season ~ aqhi_average_og * ndvi_skew * wspd_average_og + (aqhi_average_og|region)


Fitting Models:  85%|████████▍ | 28/33 [31:12<07:12, 86.54s/it]

**NOTE**: Column for 'residuals' not created in model.data, but saved in model.resid only. This is because you have rows with NaNs in your data.

**NOTE** Column for 'fits' not created in model.data, but saved in model.fits only. This is because you have rows with NaNs in your data.

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average_og*ndvi_skew*wspd_average_og+(aqhi_average_og|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69650.795 	 AIC: 139323.590

Random effects:

                   Name    Var    Std
region      (Intercept)  1.883  1.372
region  aqhi_average_og  0.164  0.405

                IV1              IV2   Corr
region  (Intercept)  aqhi_average_og  0.519

Fixed effects:

Fitting model28 with formula: death_next_season ~ aqhi_average_og * ndvi_skew * wspd_average_og + (aqhi_average_og|operation_id)


Fitting Models:  88%|████████▊ | 29/33 [33:07<06:21, 95.35s/it]

**NOTE**: Column for 'residuals' not created in model.data, but saved in model.resid only. This is because you have rows with NaNs in your data.

**NOTE** Column for 'fits' not created in model.data, but saved in model.fits only. This is because you have rows with NaNs in your data.

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average_og*ndvi_skew*wspd_average_og+(aqhi_average_og|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -66063.215 	 AIC: 132148.430

Random effects:

                         Name    Var    Std
operation_id      (Intercept)  2.248  1.499
operation_id  aqhi_average_og  2.254  1.501

                      IV1              IV2   Corr
operation_id  (Intercept)  aqhi_average_og  0.314

Fixed effects:

Fitting model29 with formula: death_next_season ~ ndvi_average + o3_average + (o3_average|operation_id)


Fitting Models:  91%|█████████ | 30/33 [33:56<04:04, 81.41s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~ndvi_average+o3_average+(o3_average|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -64696.648 	 AIC: 129405.296

Random effects:

                     Name    Var    Std
operation_id  (Intercept)  3.519  1.876
operation_id   o3_average  3.953  1.988

                      IV1         IV2   Corr
operation_id  (Intercept)  o3_average  0.682

Fixed effects:

Fitting model30 with formula: death_next_season ~ ndvi_average * o3_average + (o3_average|operation_id)


Fitting Models:  94%|█████████▍| 31/33 [34:42<02:21, 70.75s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~ndvi_average*o3_average+(o3_average|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -64652.788 	 AIC: 129319.575

Random effects:

                     Name    Var    Std
operation_id  (Intercept)  3.477  1.865
operation_id   o3_average  3.947  1.987

                      IV1         IV2   Corr
operation_id  (Intercept)  o3_average  0.691

Fixed effects:

Fitting model31 with formula: death_next_season ~ ndvi_average * o3_average + wspd_average_og + (o3_average|operation_id)


Fitting Models:  97%|█████████▋| 32/33 [35:39<01:06, 66.58s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~ndvi_average*o3_average+wspd_average_og+(o3_average|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -64589.811 	 AIC: 129195.623

Random effects:

                     Name    Var    Std
operation_id  (Intercept)  3.482  1.866
operation_id   o3_average  3.883  1.970

                      IV1         IV2   Corr
operation_id  (Intercept)  o3_average  0.685

Fixed effects:

Fitting model32 with formula: death_next_season ~ ndvi_average * o3_average * wspd_average_og + (o3_average|operation_id)


Fitting Models: 100%|██████████| 33/33 [37:32<00:00, 68.27s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~ndvi_average*o3_average*wspd_average_og+(o3_average|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -64516.287 	 AIC: 129054.575

Random effects:

                     Name    Var    Std
operation_id  (Intercept)  3.464  1.861
operation_id   o3_average  4.042  2.011

                      IV1         IV2  Corr
operation_id  (Intercept)  o3_average  0.69

Fixed effects:






Models saved to /Users/maximeff-nectar/Documents/ledge-nectar/aqi/outputs/fitted_GLMMs.pkl.
Models saved to /Users/maximeff-nectar/Documents/ledge-nectar/aqi/outputs/summaries_GLMMs.pkl.


### Load models in session

In [10]:
fitted_models = load_fitted_models("fitted_GLMMs.pkl")

Models loaded from /Users/maximeff-nectar/Documents/ledge-nectar/aqi/outputs/fitted_GLMMs.pkl.


In [42]:
# Initialize variables
best_aic = float('inf')
best_model = None
best_model_name = None

# Loop over the fitted models dict
for model_name, model in fitted_models.items():
    
    # Check if the model has warnings
    if len(model.warnings) == 0:
        # Get the AIC for the current model
        current_aic = model.AIC
        
        # Compare with the best AIC found so far
        if current_aic < best_aic:
            print(f"Current best model is {model_name} with an AIC of {current_aic}")
            best_aic = current_aic
            best_model = model
            best_model_name = model_name
    else:
        print(f"Model {model_name} did not converge due to warnings: {model.warnings}")

Current best model is model0 with an AIC of 142319.23490165427
Current best model is model1 with an AIC of 138946.95945308142
Current best model is model2 with an AIC of 138683.27267033744
Current best model is model3 with an AIC of 136834.70097385795
Current best model is model4 with an AIC of 136833.73152819887
Current best model is model8 with an AIC of 132924.32125198081
Current best model is model9 with an AIC of 131344.03276013533
Current best model is model15 with an AIC of 131268.5978089567
Current best model is model16 with an AIC of 129516.05600307278
Current best model is model17 with an AIC of 129511.72722862924
Current best model is model29 with an AIC of 129405.29649029924
Current best model is model30 with an AIC of 129319.57520043233
Current best model is model31 with an AIC of 129195.6229296742
Current best model is model32 with an AIC of 129054.57482102417


In [43]:
print(best_model.fit())

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~ndvi_average*o3_average*wspd_average_og+(o3_average|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -64516.287 	 AIC: 129054.575

Random effects:

                     Name    Var    Std
operation_id  (Intercept)  3.464  1.861
operation_id   o3_average  4.042  2.011

                      IV1         IV2  Corr
operation_id  (Intercept)  o3_average  0.69

Fixed effects:

                                         Estimate  2.5_ci  97.5_ci     SE  \
(Intercept)                                -0.149  -1.040    0.743  0.455   
ndvi_average                               -0.413  -0.432   -0.393  0.010   
o3_average                                  1.243   0.287    2.200  0.488   
wspd_average_og                            -0.126  -0.142   -0.109  0.009   
ndvi_average:o3_average                    -0.106  -0.126   -0.085  0

Save the best model output

In [44]:
project_root = os.path.dirname(os.getcwd())
output_dir = os.path.join(project_root, 'outputs')    
filepath = os.path.join(output_dir, "GLMM_best_model.joblib")

save_model(best_model, filepath = filepath)