In [114]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [115]:
import sys
sys.path.append("../../")

In [127]:
import pandas as pd
import geopandas as gpd
import pymc as pm
import arviz as az

from config.config import BASE_PATH, PATH_TO_PATH_CONFIG_FILE
from src.utils import load_paths_from_yaml, replace_base_path
from src.ml.encodings import convert_aspect_to_cardinal_direction
from src.ml.bayesian_models import create_blr_partial_pooling_for_ffmc_adjustment


In [117]:

paths = load_paths_from_yaml(PATH_TO_PATH_CONFIG_FILE)
paths = replace_base_path(paths, BASE_PATH)

In [118]:
def temporal_train_test_split(train_data: pd.DataFrame, date_col: str, train_size: float) -> tuple:
    df = train_data.copy()
    df[date_col] = pd.to_datetime(df.date)
    df = df.sort_values(by=date_col)
    split_index = int(train_size * len(df))
    train_df =  df.iloc[:split_index]
    test_df = df.iloc[split_index:]
    return train_df, test_df

In [119]:
# read training data
train_data = gpd.read_file(paths["training_data"]["subset"])
train_data = train_data.loc[:, ["ffmc", "aspect", "foresttype", "fire", "date"]]


In [120]:
# cleaning
train_data.dropna(inplace=True)

In [121]:
# feature engineering
train_data["aspect"] = train_data.aspect.apply(convert_aspect_to_cardinal_direction).astype("int")
train_data["foresttype"] = train_data.foresttype.astype("int")

<class 'pandas.core.frame.DataFrame'>
Int64Index: 886 entries, 0 to 920
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   ffmc        886 non-null    float64
 1   aspect      886 non-null    int32  
 2   foresttype  886 non-null    int32  
 3   fire        886 non-null    int64  
 4   date        886 non-null    object 
dtypes: float64(1), int32(2), int64(1), object(1)
memory usage: 34.6+ KB


In [122]:
coords = {"aspect_groups": list(train_data.aspect.unique()), 
          "foresttype_groups": list(train_data.foresttype.unique())}

In [123]:
# Split data temporally 
# Older samples (70%) will be used for training; newer samples (30%) will be used for evaluation
train_df, test_df = temporal_train_test_split(train_data, "date", 0.7)
X_train, y_train = train_df.loc[:,["ffmc", "foresttype", "aspect", "date"]], train_df.loc[:,"fire"]
X_test, y_test = test_df.loc[:,["ffmc", "foresttype", "aspect", "date"]], train_df.loc[:,"fire"]

In [125]:
model = create_blr_partial_pooling_for_ffmc_adjustment(X_train, y_train, coords)

In [133]:
# compute maximum a-posteriori estimate
# for logistic regression weights
with model:
    #map_estimate=pm.find_MAP()
    #step = pm.Metropolis()
    #idata=pm.sample(draws=10000, tune=5000, start=map_estimate, step=step, return_inferencedata=True)
    approx = pm.fit()
    idata = approx.sample()

 |████████████████████████████████████████| 100.00% [10000/10000 00:10<00:00 Average Loss = 2,017]

Finished [100%]: Average Loss = 2,013.9


In [134]:
az.summary(idata, var_names=["beta_ffmc"])



Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
"beta_ffmc[5, 2]",0.171,0.214,-0.198,0.593,0.011,0.007,417.0,471.0,
"beta_ffmc[5, 6]",0.139,0.647,-1.023,1.304,0.029,0.02,496.0,463.0,
"beta_ffmc[5, 4]",-0.505,0.366,-1.102,0.227,0.018,0.013,402.0,359.0,
"beta_ffmc[5, 0]",0.108,0.63,-1.085,1.206,0.029,0.022,480.0,470.0,
"beta_ffmc[5, 5]",0.53,0.395,-0.281,1.218,0.02,0.014,403.0,421.0,
"beta_ffmc[5, 3]",0.134,0.624,-0.971,1.27,0.027,0.019,528.0,424.0,
"beta_ffmc[5, 1]",0.071,0.624,-1.124,1.146,0.028,0.023,486.0,468.0,
"beta_ffmc[2, 2]",-0.044,0.09,-0.199,0.129,0.004,0.003,462.0,463.0,
"beta_ffmc[2, 6]",0.09,0.632,-1.071,1.277,0.028,0.02,509.0,563.0,
"beta_ffmc[2, 4]",-0.047,0.09,-0.212,0.112,0.004,0.003,554.0,515.0,


In [130]:
X_train

Unnamed: 0,ffmc,foresttype,aspect,date
742,84.866463,5,5,2012-03-04
741,85.928877,5,5,2012-03-05
740,85.047707,0,5,2012-03-08
739,85.140994,5,3,2012-03-09
738,86.769089,5,4,2012-03-10
...,...,...,...,...
764,85.092341,4,7,2019-03-28
250,86.965689,0,4,2019-03-29
249,86.977560,5,3,2019-03-29
248,86.014187,6,4,2019-03-30
