
## DESCRIPTION

This notebook fits a series of generalized linear mixed models

***
### SETUP

Load the setup script

In [1]:
from utils.common_setup import *

Load the functions to :

- Fit the models
- Save the models
- Load the models

In [2]:
from utils.functions import fit_models, save_fitted_models, load_fitted_models

Import the cleaned dataset

In [3]:
data_folder = os.path.join(os.getcwd(), '..', 'data')
data = pd.read_csv(os.path.join(data_folder, 'clean_data.csv'))

### IF PackageNotInstalledError: The R package "lme4" is not installed. 


See README for instruction


```
from rpy2.robjects.packages import importr


utils = importr('utils')
utils.chooseCRANmirror(ind=12)
utils.install_packages('lme4')
```

### FIT GLMM

### Define model formulas

There are 33 models starting from model 0

In [21]:
model_formulas = [
    
    # Null model ------------------------------------------------

    # model0
    "death_next_season ~ 1 + (1|region)",

    # Models with aqhi average and region -----------------------

    # model1
    "death_next_season ~ aqhi_average + ndvi_average + (aqhi_average|region)",

    # model2
    "death_next_season ~ aqhi_average * ndvi_average + (aqhi_average|region)",

    # model3
    "death_next_season ~ aqhi_average + ndvi_average + tavg_average + prcp_average + wspd_average + (aqhi_average|region)",

    # model4
    "death_next_season ~ aqhi_average * wspd_average + ndvi_average + prcp_average + tavg_average + (aqhi_average|region)",

    # model5
    "death_next_season ~ aqhi_average * ndvi_average + wspd_average + (ndvi_average|region)",

    # model6
    "death_next_season ~ aqhi_average * wspd_average * tavg_average + (aqhi_average|region)",

    # model7
    "death_next_season ~ aqhi_average * wspd_average * ndvi_average + (ndvi_average|region)",

    # Models with aqhi average and operation id -----------------

    # model8
    "death_next_season ~ aqhi_average * ndvi_average + (1|operation_id)",

    # model9
    "death_next_season ~ aqhi_average * ndvi_average + (aqhi_average|operation_id)",

    # model10
    "death_next_season ~ aqhi_average + ndvi_average + wspd_average + (1|operation_id)",

    # model11
    "death_next_season ~ aqhi_average + ndvi_average + wspd_average + (aqhi_average|operation_id)",

    # model12
    "death_next_season ~ aqhi_average * wspd_average + ndvi_average + (aqhi_average|operation_id)",

    # model13
    "death_next_season ~ aqhi_average * ndvi_average + wspd_average + (1|operation_id)",

    # model14
    "death_next_season ~ aqhi_average * ndvi_average * wspd_average + (1|operation_id)",

    # model15
    "death_next_season ~ aqhi_average * ndvi_average * wspd_average + (aqhi_average|operation_id)",

    # model16
    "death_next_season ~ aqhi_average + ndvi_average + wspd_average + tavg_average + prcp_average + (aqhi_average|operation_id)",

    # model17
    "death_next_season ~ aqhi_average * ndvi_average + tavg_average * prcp_average + wspd_average + (aqhi_average|operation_id)",

    # Models with aqhi skew and region --------------------------
    
    # model18
    "death_next_season ~ aqhi_skew + ndvi_skew + (aqhi_skew|region)",

    # model19
    "death_next_season ~ aqhi_skew * ndvi_skew + (aqhi_skew|region)",

    # model20
    "death_next_season ~ aqhi_skew * ndvi_average + wspd_average + (aqhi_skew|region)",

    # model21
    "death_next_season ~ aqhi_skew * ndvi_average * wspd_average + (aqhi_skew|region)",

    # Models with aqhi max ---------------------------

    # model22
    "death_next_season ~ aqhi_max * ndvi_average + wspd_average + (ndvi_average|region)",

    # model23
    "death_next_season ~ aqhi_max * ndvi_average * wspd_average + (ndvi_average|region)",

    # model24
    "death_next_season ~ aqhi_max * ndvi_average + wspd_average + (aqhi_average|operation_id)",

    # model25
    "death_next_season ~ aqhi_max * ndvi_average * wspd_average + (aqhi_average|operation_id)",

    # Models with ndvi skew --------------------------

    # model26
    "death_next_season ~ aqhi_average * ndvi_skew + wspd_average + (aqhi_average|region)",

    # model27
    "death_next_season ~ aqhi_average * ndvi_skew * wspd_average + (aqhi_average|region)",

    # model28
    "death_next_season ~ aqhi_average * ndvi_skew * wspd_average + (aqhi_average|operation_id)",

    # Models with O3 and operation id ---------------------------

    # model29
    "death_next_season ~ ndvi_average + o3_average + (o3_average|operation_id)",

    # model30
    "death_next_season ~ ndvi_average * o3_average + (o3_average|operation_id)",

    # model31
    "death_next_season ~ ndvi_average * o3_average + wspd_average + (o3_average|operation_id)",

    # model32
    "death_next_season ~ ndvi_average * o3_average * wspd_average + (o3_average|operation_id)"

]

### Standardize all variables

In [5]:
from sklearn.preprocessing import StandardScaler

# Variables
variables_to_scale = [
    'aqhi_average', 'ndvi_average', 'wspd_average', 
    'tavg_average', 'prcp_average', 'o3_average',
    'aqhi_skew', 'aqhi_max', 'ndvi_skew'
]

# Initialize the scaler
scaler = StandardScaler()

# Overwrite original variables names with scaled variables
data[variables_to_scale] = scaler.fit_transform(data[variables_to_scale])

In [15]:
print(data[variables_to_scale].describe())

       aqhi_average  ndvi_average  wspd_average  tavg_average  prcp_average  \
count  1.126260e+05  1.126260e+05  1.126260e+05  1.126260e+05  1.126260e+05   
mean  -2.099592e-16 -1.736201e-16 -3.391648e-16 -3.109011e-15  4.037676e-17   
std    1.000004e+00  1.000004e+00  1.000004e+00  1.000004e+00  1.000004e+00   
min   -4.424029e+00 -8.433916e+00 -3.346216e+01 -1.436220e+01 -5.548384e+00   
25%   -7.151574e-01 -4.662709e-01 -2.370080e-01 -6.814093e-01 -9.297429e-01   
50%   -2.315185e-01 -1.513841e-01  1.031090e-01 -1.269333e-01  4.509853e-01   
75%    9.832993e-01  3.347263e-01  4.154009e-01  6.577897e-01  7.345448e-01   
max    2.308546e+00  1.472707e+01  2.951177e+00  3.108767e+00  2.473830e+00   

         o3_average     aqhi_skew      aqhi_max     ndvi_skew  
count  1.126260e+05  1.121070e+05  1.126260e+05  1.121570e+05  
mean   4.602951e-16 -5.678916e-17  1.049796e-16  3.745400e-16  
std    1.000004e+00  1.000004e+00  1.000004e+00  1.000004e+00  
min   -3.246371e+01 -4.412670e+0

Inspect how many models

In [19]:
len(model_formulas)

32

In [7]:
data.columns

Index(['hid', 'creation_date', 'operation_id', 'death_date', 'season',
       'season_start_month', 'season_start_day', 'death_next_season',
       'hive_age_next_season', 'aqhi_average', 'prcp_average', 'wspd_average',
       'ndvi_average', 'tavg_average', 'o3_average', 'aqhi_skew', 'prcp_skew',
       'wspd_skew', 'ndvi_skew', 'tavg_skew', 'o3_skew', 'aqhi_max',
       'prcp_max', 'wspd_max', 'ndvi_max', 'tavg_max', 'o3_max', 'region',
       'wspd_average_og', 'tavg_average_og', 'prcp_average_og',
       'aqhi_average_og'],
      dtype='object')

### Fit the models and save (**run once**)

Fitting all models takes **approximately 1.2 hours.**

Run once and save in ```aqi/outputs``` folder so you can reimport in session.

File is called ```fitted_GLMMs.pkl```

In [22]:
fitted_models, model_summaries = fit_models(
    data=data,
    formulas=model_formulas
)

save_fitted_models(
    fitted_models=fitted_models,
    filename="fitted_GLMMs.pkl"
)

save_fitted_models(
    fitted_models=model_summaries,
    filename="summaries_GLMMs.pkl"
)

Fitting Models:   0%|          | 0/33 [00:00<?, ?it/s]

Fitting model0 with formula: death_next_season ~ 1 + (1|region)


Fitting Models:   3%|▎         | 1/33 [00:14<07:37, 14.30s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~1+(1|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -71157.617 	 AIC: 142319.235

Random effects:

               Name    Var    Std
region  (Intercept)  0.979  0.989

No random effect correlations specified

Fixed effects:

Fitting model1 with formula: death_next_season ~ aqhi_average + ndvi_average + (aqhi_average|region)


Fitting Models:   6%|▌         | 2/33 [00:49<13:38, 26.39s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average+ndvi_average+(aqhi_average|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69483.983 	 AIC: 138979.966

Random effects:

                Name    Var    Std
region   (Intercept)  1.598  1.264
region  aqhi_average  0.205  0.452

                IV1           IV2   Corr
region  (Intercept)  aqhi_average -0.088

Fixed effects:

Fitting model2 with formula: death_next_season ~ aqhi_average * ndvi_average + (aqhi_average|region)


Fitting Models:   9%|▉         | 3/33 [01:40<18:58, 37.95s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average*ndvi_average+(aqhi_average|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69346.746 	 AIC: 138707.492

Random effects:

                Name    Var    Std
region   (Intercept)  1.694  1.302
region  aqhi_average  0.182  0.426

                IV1           IV2   Corr
region  (Intercept)  aqhi_average  0.047

Fixed effects:

Fitting model3 with formula: death_next_season ~ aqhi_average + ndvi_average + tavg_average + prcp_average + wspd_average + (aqhi_average|region)


Fitting Models:  12%|█▏        | 4/33 [02:32<20:53, 43.24s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average+ndvi_average+tavg_average+prcp_average+wspd_average+(aqhi_average|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -68446.787 	 AIC: 136911.575

Random effects:

                Name    Var    Std
region   (Intercept)  1.916  1.384
region  aqhi_average  0.209  0.457

                IV1           IV2   Corr
region  (Intercept)  aqhi_average -0.313

Fixed effects:

Fitting model4 with formula: death_next_season ~ aqhi_average * wspd_average + ndvi_average + prcp_average + tavg_average + (aqhi_average|region)


Fitting Models:  15%|█▌        | 5/33 [03:33<23:15, 49.84s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average*wspd_average+ndvi_average+prcp_average+tavg_average+(aqhi_average|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -68439.579 	 AIC: 136899.158

Random effects:

                Name    Var    Std
region   (Intercept)  1.908  1.381
region  aqhi_average  0.205  0.453

                IV1           IV2   Corr
region  (Intercept)  aqhi_average -0.331

Fixed effects:

Fitting model5 with formula: death_next_season ~ aqhi_average * ndvi_average + wspd_average + (ndvi_average|region)


Fitting Models:  18%|█▊        | 6/33 [04:26<22:48, 50.68s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average*ndvi_average+wspd_average+(ndvi_average|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69466.084 	 AIC: 138948.167

Random effects:

                Name    Var    Std
region   (Intercept)  1.979  1.407
region  ndvi_average  0.316  0.562

                IV1           IV2   Corr
region  (Intercept)  ndvi_average  0.568

Fixed effects:

Fitting model6 with formula: death_next_season ~ aqhi_average * wspd_average * tavg_average + (aqhi_average|region)


Fitting Models:  21%|██        | 7/33 [05:41<25:28, 58.78s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average*wspd_average*tavg_average+(aqhi_average|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -68338.861 	 AIC: 136699.722

Random effects:

                Name    Var    Std
region   (Intercept)  1.821  1.350
region  aqhi_average  0.362  0.601

                IV1           IV2   Corr
region  (Intercept)  aqhi_average -0.177

Fixed effects:

Fitting model7 with formula: death_next_season ~ aqhi_average * wspd_average * ndvi_average + (ndvi_average|region)


Fitting Models:  24%|██▍       | 8/33 [07:09<28:19, 67.97s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average*wspd_average*ndvi_average+(ndvi_average|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69439.475 	 AIC: 138900.951

Random effects:

                Name    Var    Std
region   (Intercept)  1.980  1.407
region  ndvi_average  0.335  0.579

                IV1           IV2   Corr
region  (Intercept)  ndvi_average  0.562

Fixed effects:

Fitting model8 with formula: death_next_season ~ aqhi_average * ndvi_average + (1|operation_id)


Fitting Models:  27%|██▋       | 9/33 [07:31<21:28, 53.69s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average*ndvi_average+(1|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -66436.358 	 AIC: 132882.716

Random effects:

                     Name    Var    Std
operation_id  (Intercept)  2.286  1.512

No random effect correlations specified

Fixed effects:

Fitting model9 with formula: death_next_season ~ aqhi_average * ndvi_average + (aqhi_average|operation_id)


Fitting Models:  30%|███       | 10/33 [08:23<20:23, 53.21s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average*ndvi_average+(aqhi_average|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -65733.251 	 AIC: 131480.502

Random effects:

                      Name    Var    Std
operation_id   (Intercept)  1.962  1.401
operation_id  aqhi_average  1.942  1.394

                      IV1           IV2  Corr
operation_id  (Intercept)  aqhi_average  0.13

Fixed effects:

Fitting model10 with formula: death_next_season ~ aqhi_average + ndvi_average + wspd_average + (1|operation_id)


Fitting Models:  33%|███▎      | 11/33 [08:46<16:05, 43.87s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average+ndvi_average+wspd_average+(1|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -66515.599 	 AIC: 133041.198

Random effects:

                     Name    Var  Std
operation_id  (Intercept)  2.249  1.5

No random effect correlations specified

Fixed effects:

Fitting model11 with formula: death_next_season ~ aqhi_average + ndvi_average + wspd_average + (aqhi_average|operation_id)


Fitting Models:  36%|███▋      | 12/33 [09:35<15:55, 45.51s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average+ndvi_average+wspd_average+(aqhi_average|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -65762.770 	 AIC: 131539.541

Random effects:

                      Name    Var    Std
operation_id   (Intercept)  1.952  1.397
operation_id  aqhi_average  1.868  1.367

                      IV1           IV2   Corr
operation_id  (Intercept)  aqhi_average  0.084

Fixed effects:

Fitting model12 with formula: death_next_season ~ aqhi_average * wspd_average + ndvi_average + (aqhi_average|operation_id)


Fitting Models:  39%|███▉      | 13/33 [10:31<16:13, 48.66s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average*wspd_average+ndvi_average+(aqhi_average|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -65762.762 	 AIC: 131541.524

Random effects:

                      Name    Var    Std
operation_id   (Intercept)  1.953  1.397
operation_id  aqhi_average  1.868  1.367

                      IV1           IV2   Corr
operation_id  (Intercept)  aqhi_average  0.084

Fixed effects:

Fitting model13 with formula: death_next_season ~ aqhi_average * ndvi_average + wspd_average + (1|operation_id)


Fitting Models:  42%|████▏     | 14/33 [10:58<13:18, 42.01s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average*ndvi_average+wspd_average+(1|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -66387.695 	 AIC: 132787.390

Random effects:

                     Name    Var    Std
operation_id  (Intercept)  2.346  1.532

No random effect correlations specified

Fixed effects:

Fitting model14 with formula: death_next_season ~ aqhi_average * ndvi_average * wspd_average + (1|operation_id)


Fitting Models:  45%|████▌     | 15/33 [11:41<12:41, 42.32s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average*ndvi_average*wspd_average+(1|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -66377.312 	 AIC: 132772.625

Random effects:

                     Name    Var    Std
operation_id  (Intercept)  2.352  1.534

No random effect correlations specified

Fixed effects:

Fitting model15 with formula: death_next_season ~ aqhi_average * ndvi_average * wspd_average + (aqhi_average|operation_id)


Fitting Models:  48%|████▊     | 16/33 [12:42<13:35, 47.97s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average*ndvi_average*wspd_average+(aqhi_average|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -65703.364 	 AIC: 131428.727

Random effects:

                      Name    Var    Std
operation_id   (Intercept)  1.981  1.407
operation_id  aqhi_average  1.907  1.381

                      IV1           IV2   Corr
operation_id  (Intercept)  aqhi_average  0.115

Fixed effects:

Fitting model16 with formula: death_next_season ~ aqhi_average + ndvi_average + wspd_average + tavg_average + prcp_average + (aqhi_average|operation_id)


Fitting Models:  52%|█████▏    | 17/33 [13:47<14:11, 53.20s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average+ndvi_average+wspd_average+tavg_average+prcp_average+(aqhi_average|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -64778.182 	 AIC: 129574.364

Random effects:

                      Name    Var    Std
operation_id   (Intercept)  2.690  1.640
operation_id  aqhi_average  2.121  1.456

                      IV1           IV2   Corr
operation_id  (Intercept)  aqhi_average  0.165

Fixed effects:

Fitting model17 with formula: death_next_season ~ aqhi_average * ndvi_average + tavg_average * prcp_average + wspd_average + (aqhi_average|operation_id)


Fitting Models:  55%|█████▍    | 18/33 [15:13<15:44, 62.98s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average*ndvi_average+tavg_average*prcp_average+wspd_average+(aqhi_average|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -64757.014 	 AIC: 129536.028

Random effects:

                      Name    Var    Std
operation_id   (Intercept)  2.688  1.639
operation_id  aqhi_average  2.076  1.441

                      IV1           IV2   Corr
operation_id  (Intercept)  aqhi_average  0.182

Fixed effects:

Fitting model18 with formula: death_next_season ~ aqhi_skew + ndvi_skew + (aqhi_skew|region)


Fitting Models:  58%|█████▊    | 19/33 [15:41<12:14, 52.44s/it]

**NOTE**: Column for 'residuals' not created in model.data, but saved in model.resid only. This is because you have rows with NaNs in your data.

**NOTE** Column for 'fits' not created in model.data, but saved in model.fits only. This is because you have rows with NaNs in your data.

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_skew+ndvi_skew+(aqhi_skew|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69909.878 	 AIC: 139831.757

Random effects:

               Name    Var    Std
region  (Intercept)  0.938  0.969
region    aqhi_skew  0.303  0.550

                IV1        IV2   Corr
region  (Intercept)  aqhi_skew -0.309

Fixed effects:

Fitting model19 with formula: death_next_season ~ aqhi_skew * ndvi_skew + (aqhi_skew|region)


Fitting Models:  61%|██████    | 20/33 [16:15<10:11, 47.06s/it]

**NOTE**: Column for 'residuals' not created in model.data, but saved in model.resid only. This is because you have rows with NaNs in your data.

**NOTE** Column for 'fits' not created in model.data, but saved in model.fits only. This is because you have rows with NaNs in your data.

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_skew*ndvi_skew+(aqhi_skew|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69549.952 	 AIC: 139113.904

Random effects:

               Name    Var    Std
region  (Intercept)  0.929  0.964
region    aqhi_skew  0.317  0.563

                IV1        IV2   Corr
region  (Intercept)  aqhi_skew -0.327

Fixed effects:

Fitting model20 with formula: death_next_season ~ aqhi_skew * ndvi_average + wspd_average + (aqhi_skew|region)


Fitting Models:  64%|██████▎   | 21/33 [46:51<1:56:46, 583.88s/it]

**NOTE**: Column for 'residuals' not created in model.data, but saved in model.resid only. This is because you have rows with NaNs in your data.

**NOTE** Column for 'fits' not created in model.data, but saved in model.fits only. This is because you have rows with NaNs in your data.

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_skew*ndvi_average+wspd_average+(aqhi_skew|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69010.985 	 AIC: 138037.969

Random effects:

               Name    Var    Std
region  (Intercept)  1.072  1.035
region    aqhi_skew  0.282  0.531

                IV1        IV2   Corr
region  (Intercept)  aqhi_skew -0.391

Fixed effects:

Fitting model21 with formula: death_next_season ~ aqhi_skew * ndvi_average * wspd_average + (aqhi_skew|region)


Fitting Models:  67%|██████▋   | 22/33 [1:00:39<2:00:31, 657.38s/it]

**NOTE**: Column for 'residuals' not created in model.data, but saved in model.resid only. This is because you have rows with NaNs in your data.

**NOTE** Column for 'fits' not created in model.data, but saved in model.fits only. This is because you have rows with NaNs in your data.

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_skew*ndvi_average*wspd_average+(aqhi_skew|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -68988.409 	 AIC: 137998.817

Random effects:

               Name    Var    Std
region  (Intercept)  1.080  1.039
region    aqhi_skew  0.255  0.505

                IV1        IV2   Corr
region  (Intercept)  aqhi_skew -0.413

Fixed effects:

Fitting model22 with formula: death_next_season ~ aqhi_max * ndvi_average + wspd_average + (ndvi_average|region)


Fitting Models:  70%|██████▉   | 23/33 [1:01:40<1:19:41, 478.12s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_max*ndvi_average+wspd_average+(ndvi_average|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69955.917 	 AIC: 139927.834

Random effects:

                Name    Var    Std
region   (Intercept)  1.577  1.256
region  ndvi_average  0.404  0.635

                IV1           IV2   Corr
region  (Intercept)  ndvi_average  0.794

Fixed effects:

Fitting model23 with formula: death_next_season ~ aqhi_max * ndvi_average * wspd_average + (ndvi_average|region)


Fitting Models:  73%|███████▎  | 24/33 [1:03:42<55:42, 371.37s/it]  

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_max*ndvi_average*wspd_average+(ndvi_average|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69912.271 	 AIC: 139846.541

Random effects:

                Name    Var    Std
region   (Intercept)  1.601  1.265
region  ndvi_average  0.425  0.652

                IV1           IV2   Corr
region  (Intercept)  ndvi_average  0.798

Fixed effects:

Fitting model24 with formula: death_next_season ~ aqhi_max * ndvi_average + wspd_average + (aqhi_average|operation_id)


Fitting Models:  76%|███████▌  | 25/33 [1:04:57<37:39, 282.43s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_max*ndvi_average+wspd_average+(aqhi_average|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -65344.031 	 AIC: 130704.061

Random effects:

                      Name    Var    Std
operation_id   (Intercept)  2.204  1.485
operation_id  aqhi_average  2.780  1.667

                      IV1           IV2   Corr
operation_id  (Intercept)  aqhi_average  0.329

Fixed effects:

Fitting model25 with formula: death_next_season ~ aqhi_max * ndvi_average * wspd_average + (aqhi_average|operation_id)


Fitting Models:  79%|███████▉  | 26/33 [1:06:31<26:21, 225.98s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_max*ndvi_average*wspd_average+(aqhi_average|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -65325.250 	 AIC: 130672.499

Random effects:

                      Name    Var    Std
operation_id   (Intercept)  2.260  1.503
operation_id  aqhi_average  2.811  1.677

                      IV1           IV2   Corr
operation_id  (Intercept)  aqhi_average  0.296

Fixed effects:

Fitting model26 with formula: death_next_season ~ aqhi_average * ndvi_skew + wspd_average + (aqhi_average|region)


Fitting Models:  82%|████████▏ | 27/33 [1:07:24<17:25, 174.19s/it]

**NOTE**: Column for 'residuals' not created in model.data, but saved in model.resid only. This is because you have rows with NaNs in your data.

**NOTE** Column for 'fits' not created in model.data, but saved in model.fits only. This is because you have rows with NaNs in your data.

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average*ndvi_skew+wspd_average+(aqhi_average|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69708.179 	 AIC: 139432.359

Random effects:

                Name    Var    Std
region   (Intercept)  1.739  1.319
region  aqhi_average  0.170  0.412

                IV1           IV2   Corr
region  (Intercept)  aqhi_average  0.071

Fixed effects:

Fitting model27 with formula: death_next_season ~ aqhi_average * ndvi_skew * wspd_average + (aqhi_average|region)


Fitting Models:  85%|████████▍ | 28/33 [1:08:48<12:14, 146.96s/it]

**NOTE**: Column for 'residuals' not created in model.data, but saved in model.resid only. This is because you have rows with NaNs in your data.

**NOTE** Column for 'fits' not created in model.data, but saved in model.fits only. This is because you have rows with NaNs in your data.

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average*ndvi_skew*wspd_average+(aqhi_average|region)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'region': 9.0}

Log-likelihood: -69691.481 	 AIC: 139404.962

Random effects:

                Name    Var    Std
region   (Intercept)  1.738  1.318
region  aqhi_average  0.157  0.397

                IV1           IV2   Corr
region  (Intercept)  aqhi_average  0.066

Fixed effects:

Fitting model28 with formula: death_next_season ~ aqhi_average * ndvi_skew * wspd_average + (aqhi_average|operation_id)


Fitting Models:  88%|████████▊ | 29/33 [1:10:11<08:31, 127.87s/it]

**NOTE**: Column for 'residuals' not created in model.data, but saved in model.resid only. This is because you have rows with NaNs in your data.

**NOTE** Column for 'fits' not created in model.data, but saved in model.fits only. This is because you have rows with NaNs in your data.

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~aqhi_average*ndvi_skew*wspd_average+(aqhi_average|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -66149.218 	 AIC: 132320.436

Random effects:

                      Name    Var    Std
operation_id   (Intercept)  1.982  1.408
operation_id  aqhi_average  1.850  1.360

                      IV1           IV2   Corr
operation_id  (Intercept)  aqhi_average  0.096

Fixed effects:

Fitting model29 with formula: death_next_season ~ ndvi_average + o3_average + (o3_average|operation_id)


Fitting Models:  91%|█████████ | 30/33 [1:11:04<05:15, 105.22s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~ndvi_average+o3_average+(o3_average|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -64696.648 	 AIC: 129405.296

Random effects:

                     Name    Var    Std
operation_id  (Intercept)  3.519  1.876
operation_id   o3_average  3.953  1.988

                      IV1         IV2   Corr
operation_id  (Intercept)  o3_average  0.682

Fixed effects:

Fitting model30 with formula: death_next_season ~ ndvi_average * o3_average + (o3_average|operation_id)


Fitting Models:  94%|█████████▍| 31/33 [1:11:55<02:58, 89.19s/it] 

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~ndvi_average*o3_average+(o3_average|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -64652.788 	 AIC: 129319.575

Random effects:

                     Name    Var    Std
operation_id  (Intercept)  3.477  1.865
operation_id   o3_average  3.947  1.987

                      IV1         IV2   Corr
operation_id  (Intercept)  o3_average  0.691

Fixed effects:

Fitting model31 with formula: death_next_season ~ ndvi_average * o3_average + wspd_average + (o3_average|operation_id)


Fitting Models:  97%|█████████▋| 32/33 [1:13:01<01:22, 82.18s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~ndvi_average*o3_average+wspd_average+(o3_average|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -64613.069 	 AIC: 129242.139

Random effects:

                     Name    Var    Std
operation_id  (Intercept)  3.479  1.865
operation_id   o3_average  3.880  1.970

                      IV1         IV2   Corr
operation_id  (Intercept)  o3_average  0.688

Fixed effects:

Fitting model32 with formula: death_next_season ~ ndvi_average * o3_average * wspd_average + (o3_average|operation_id)


Fitting Models: 100%|██████████| 33/33 [1:14:40<00:00, 135.77s/it]

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~ndvi_average*o3_average*wspd_average+(o3_average|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -64579.417 	 AIC: 129180.833

Random effects:

                     Name    Var    Std
operation_id  (Intercept)  3.432  1.853
operation_id   o3_average  3.920  1.980

                      IV1         IV2   Corr
operation_id  (Intercept)  o3_average  0.687

Fixed effects:






Models saved to /Users/maximeff-nectar/Documents/ledge-nectar/aqi/outputs/fitted_GLMMs.pkl.
Models saved to /Users/maximeff-nectar/Documents/ledge-nectar/aqi/outputs/summaries_GLMMs.pkl.


### Load models in session

In [10]:
fitted_models = load_fitted_models("fitted_GLMMs.pkl")

Models loaded from /Users/maximeff-nectar/Documents/ledge-nectar/aqi/outputs/fitted_GLMMs.pkl.


In [23]:
# Initialize variables
best_aic = float('inf')
best_model = None
best_model_name = None

# Loop over the fitted models dict
for model_name, model in fitted_models.items():
    
    # Check if the model has warnings
    if len(model.warnings) == 0:
        # Get the AIC for the current model
        current_aic = model.AIC
        
        # Compare with the best AIC found so far
        if current_aic < best_aic:
            print(f"Current best model is {model_name} with an AIC of {current_aic}")
            best_aic = current_aic
            best_model = model
            best_model_name = model_name
    else:
        print(f"Model {model_name} did not converge due to warnings: {model.warnings}")

Current best model is model0 with an AIC of 142319.23490165427
Current best model is model1 with an AIC of 138979.96571818696
Current best model is model2 with an AIC of 138707.4923684611
Current best model is model3 with an AIC of 136911.5749373824
Current best model is model4 with an AIC of 136899.1582601408
Current best model is model6 with an AIC of 136699.7219109801
Current best model is model8 with an AIC of 132882.71574743759
Current best model is model9 with an AIC of 131480.50204320857
Current best model is model15 with an AIC of 131428.72718470442
Current best model is model16 with an AIC of 129574.36442449133
Current best model is model17 with an AIC of 129536.02753766163
Current best model is model29 with an AIC of 129405.29649029924
Current best model is model30 with an AIC of 129319.57520043233
Current best model is model31 with an AIC of 129242.13857908593
Current best model is model32 with an AIC of 129180.83310119298


In [24]:
print(best_model.fit())

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: death_next_season~ndvi_average*o3_average*wspd_average+(o3_average|operation_id)

Family: binomial	 Inference: parametric

Number of observations: 112626	 Groups: {'operation_id': 17.0}

Log-likelihood: -64579.417 	 AIC: 129180.833

Random effects:

                     Name    Var    Std
operation_id  (Intercept)  3.432  1.853
operation_id   o3_average  3.920  1.980

                      IV1         IV2   Corr
operation_id  (Intercept)  o3_average  0.687

Fixed effects:

                                      Estimate  2.5_ci  97.5_ci     SE     OR  \
(Intercept)                             -0.149  -1.033    0.735  0.451  0.862   
ndvi_average                            -0.403  -0.422   -0.384  0.010  0.668   
o3_average                               1.229   0.291    2.166  0.478  3.417   
wspd_average                            -0.078  -0.093   -0.064  0.007  0.925   
ndvi_average:o3_average                 -0.098  -0

Save the best model output

In [25]:
project_root = os.path.dirname(os.getcwd())
output_dir = os.path.join(project_root, 'outputs')    
filepath = os.path.join(output_dir, "GLMM_best_model.joblib")

save_model(best_model, filepath = filepath)