In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("../../")

In [9]:
import geopandas as gpd
import pymc as pm
import arviz as az
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

from config.config import BASE_PATH, PATH_TO_PATH_CONFIG_FILE
from src.utils import load_paths_from_yaml, replace_base_path
from src.modeling.encodings import convert_aspect_to_cardinal_direction, convert_canopy_cover_to_classes, convert_ffmc_to_classes
from src.modeling.bayesian_models import (create_model_ffmc_adjustment_aspect, 
                                          create_model_ffmc_adjustment_foresttype,
                                          create_model_ffmc_adjustment_canopy_cover, 
                                          create_model_ffmc_adjustment_all)
from src.modeling.utils import temporal_train_test_split

In [4]:

paths = load_paths_from_yaml(PATH_TO_PATH_CONFIG_FILE)
paths = replace_base_path(paths, BASE_PATH)

In [5]:
# read training data
train_data = gpd.read_file(paths["training_data"])
train_data = train_data.loc[:, ["ffmc", "aspect", "foresttype", "canopy_cov", "fire", "date"]]


In [6]:
# data preprocessing
train_data.dropna(inplace=True)
train_data["foresttype"] = train_data.foresttype.astype("int")
train_data["aspect_categorized"] = train_data.aspect.apply(convert_aspect_to_cardinal_direction).astype("int")
train_data["canopy_cover_categorized"] = train_data.canopy_cov.apply(convert_canopy_cover_to_classes).astype("int")


In [7]:
coords = {"aspect_groups": [0, 1, 2, 3, 4, 5, 6, 7], 
          "foresttype_groups": [0, 1, 2, 3, 4, 5, 6], 
          "canopy_cover_groups": [0, 1, 2, 3, 4]}

In [8]:
# Split data temporally 
# Older samples (70%) will be used for training; newer samples (30%) will be used for evaluation
train_df, test_df = temporal_train_test_split(train_data, "date", 0.7)
relevant_columns = ["ffmc", "foresttype", "aspect_categorized", "canopy_cover_categorized", "date"]
X_train, y_train = train_df.loc[:, relevant_columns], train_df.loc[:, "fire"]
X_test, y_test = test_df.loc[:, relevant_columns], train_df.loc[:, "fire"]

scaler = StandardScaler()
X_train["ffmc"] = scaler.fit_transform(X_train[["ffmc"]])
X_test["ffmc"] = scaler.transform(X_test[["ffmc"]])


In [10]:
model_all = create_model_ffmc_adjustment_all(X_train, y_train, coords)

In [11]:
with model_all:
    idata_all=pm.sample()

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [mu_b1, sigma_b1, intercept, beta_ffmc, error_beta]


Sampling 2 chains for 1_000 tune and 206 draw iterations (2_000 + 412 draws total) took 2336 seconds.


In [36]:
model_aspect = create_model_ffmc_adjustment_aspect(X_train, y_train, coords)
model_foresttype = create_model_ffmc_adjustment_foresttype(X_train, y_train, coords)
model_canopy_cover = create_model_ffmc_adjustment_canopy_cover(X_train, y_train, coords)

with model_aspect:
    idata_aspect=pm.sample()

with model_foresttype:
    idata_foresttype=pm.sample()

with model_canopy_cover:
    idata_canopy_cover=pm.sample()


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [mu_b1, sigma_b1, intercept, beta_ffmc, error_beta]


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 161 seconds.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [mu_b1, sigma_b1, intercept, beta_ffmc, error_beta]


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 129 seconds.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [mu_b1, sigma_b1, intercept, beta_ffmc, error_beta]


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 320 seconds.


In [37]:
# save model
import cloudpickle

dict_to_save = {'model': model_aspect,
                'idata': idata_aspect
                }

with open(f'../../models/ffmc_adjustment/model_aspect.pkl' , 'wb') as buff:
    cloudpickle.dump(dict_to_save, buff)


dict_to_save = {'model': model_foresttype,
                'idata': idata_foresttype
                }

with open(f'../../models/ffmc_adjustment/model_foresttype.pkl' , 'wb') as buff:
    cloudpickle.dump(dict_to_save, buff)


dict_to_save = {'model': model_canopy_cover,
                'idata': idata_canopy_cover
                }

with open(f'../../models/ffmc_adjustment/model_canopy_cover.pkl' , 'wb') as buff:
    cloudpickle.dump(dict_to_save, buff)

In [38]:
# Interpretation of contrast between group level coefficients and population level coefficient of ffmc
# The population level beta specifies the impact of ffmc on forest fire ignition (theoretically this should be positive; with increasing ffmc the danger of forest fire ignition increases)

In [58]:
# 0 N
# 1 NE
# 2 E
# 3 SE
# 4 S
# 5 SW
# 6 W
# 7 NW
print((idata_aspect.posterior.beta_ffmc - idata_aspect.posterior.mu_b1).mean(axis=(0, 1)).values)

[ 0.12928946 -0.29584086 -0.80555764 -0.01410498  0.58031471  0.10422119
  0.04206697  0.24718826]


In [41]:
# 0 = coniferous non pine
# 1 = coniferous with mixed pine
# 2 = pine pure
# 3 = coniferous deciduous mixed with pine
# 4 = coniferous_deciduous_mixed_non_pine
# 5 = deciduous pure
# 6 = low and no vegetation
print((idata_foresttype.posterior.beta_ffmc - idata_foresttype.posterior.mu_b1).mean(axis=(0, 1)).values)

[ 0.5920794  -0.01073765  0.21567111 -0.33980355 -0.17765833 -0.26721027
  0.55218895]


In [42]:
# 4 >80%
# 3 61-80%
# 2 41-60%
# 1 21-40%
# 0 ≤20%
print((idata_canopy_cover.posterior.beta_ffmc - idata_canopy_cover.posterior.mu_b1).mean(axis=(0, 1)).values)

[48.91624464  0.68849201  0.12935038  0.09047279 -0.40873806]


In [56]:
az.summary(idata_canopy_cover.posterior.beta_ffmc)

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
beta_ffmc[0],50.902,140.397,0.658,255.596,48.98,35.944,15.0,13.0,1.1
beta_ffmc[1],2.674,0.756,1.348,4.036,0.028,0.021,815.0,777.0,1.0
beta_ffmc[2],2.115,0.386,1.452,2.895,0.014,0.01,870.0,1029.0,1.0
beta_ffmc[3],2.076,0.257,1.6,2.541,0.009,0.006,851.0,657.0,1.0
beta_ffmc[4],1.577,0.124,1.343,1.799,0.004,0.003,1044.0,970.0,1.0


In [50]:
train_data.groupby('canopy_cover_categorized')['fire'].value_counts().unstack(fill_value=0)

fire,0,1
canopy_cover_categorized,Unnamed: 1_level_1,Unnamed: 2_level_1
0,10,9
1,36,26
2,105,62
3,210,158
4,751,823


In [59]:
train_data["canopy_cover_categorized"].value_counts()

4    1574
3     368
2     167
1      62
0      19
Name: canopy_cover_categorized, dtype: int64

In [51]:
train_data.groupby('aspect_categorized')['fire'].value_counts().unstack(fill_value=0)

fire,0,1
aspect_categorized,Unnamed: 1_level_1,Unnamed: 2_level_1
0,11,7
1,94,53
2,165,127
3,172,229
4,227,287
5,206,217
6,149,112
7,88,46


In [52]:
train_data.groupby('foresttype')['fire'].value_counts().unstack(fill_value=0)

fire,0,1
foresttype,Unnamed: 1_level_1,Unnamed: 2_level_1
0,288,293
1,44,21
2,134,168
3,62,52
4,191,128
5,275,281
6,118,135
