In [44]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [45]:
import sys
sys.path.append("../../../")

In [100]:
import geopandas as gpd
import pandas as pd
import pymc as pm
import arviz as az
from sklearn.preprocessing import StandardScaler

from config.config import BASE_PATH, PATH_TO_PATH_CONFIG_FILE
from src.utils import load_paths_from_yaml, replace_base_path
from src.modeling.encodings import convert_aspect_to_cardinal_direction, convert_canopy_cover_to_classes, convert_ffmc_to_classes
from src.modeling.bayesian_models import (create_model_ffmc_adjustment_aspect, 
                                          create_model_ffmc_adjustment_foresttype,
                                          create_model_ffmc_adjustment_canopy_cover)
from src.modeling.utils import temporal_train_test_split, save_model

In [47]:
paths = load_paths_from_yaml(PATH_TO_PATH_CONFIG_FILE)
paths = replace_base_path(paths, BASE_PATH)

### Using all data

In [48]:
# read training data
train_data = gpd.read_file(paths["training_data"])
train_data = train_data.loc[:, ["ffmc", "aspect", "foresttype", "canopy_cov", "fire", "date"]]


In [6]:
# data preprocessing
# remove entries with missing values in forest type & canopy cover
train_data.dropna(inplace=True)
train_data["foresttype"] = train_data.foresttype.astype("int")
train_data["aspect_categorized"] = train_data.aspect.apply(convert_aspect_to_cardinal_direction).astype("int")
train_data["canopy_cover_categorized"] = train_data.canopy_cov.apply(convert_canopy_cover_to_classes).astype("int")
train_data["ffmc_categorized"] = train_data.ffmc.apply(convert_ffmc_to_classes).astype("int")


In [7]:
coords = {"aspect_groups": [0, 1, 2, 3, 4, 5, 6, 7], 
          "foresttype_groups": [0, 1, 2, 3, 4, 5, 6], 
          "canopy_cover_groups": [0, 1, 2, 3, 4], 
          "ffmc_groups": [0, 1, 2, 3, 4]}

In [8]:
# Split data temporally 
# Older samples (70%) will be used for training; newer samples (30%) will be used for evaluation
train_df, test_df = temporal_train_test_split(train_data, "date", 0.7)
relevant_columns = ["ffmc", "foresttype", "aspect_categorized", "canopy_cover_categorized", "ffmc_categorized", "date"]
X_train, y_train = train_df.loc[:, relevant_columns], train_df.loc[:, "fire"]
X_test, y_test = test_df.loc[:, relevant_columns], test_df.loc[:, "fire"]

scaler = StandardScaler()
X_train["ffmc"] = scaler.fit_transform(X_train[["ffmc"]])
X_test["ffmc"] = scaler.transform(X_test[["ffmc"]])


In [9]:
model_ffmc_aspect = create_model_ffmc_adjustment_aspect(X_train, y_train, coords)
model_ffmc_canopy_cover = create_model_ffmc_adjustment_canopy_cover(X_train, y_train, coords)
model_ffmc_forest_type = create_model_ffmc_adjustment_foresttype(X_train, y_train, coords)

In [10]:
with model_ffmc_aspect:
    idata_ffmc_aspect = pm.sample(2000, target_accept=0.9, random_seed=0)

with model_ffmc_canopy_cover:
    idata_ffmc_canopy_cover = pm.sample(2000, target_accept=0.9, random_seed=0)

with model_ffmc_forest_type:
    idata_ffmc_forest_type = pm.sample(2000, target_accept=0.9, random_seed=0)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [mu_b1, sigma_b1, intercept, beta_ffmc, error_beta]


Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 408 seconds.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [mu_b1, sigma_b1, intercept, beta_ffmc, error_beta]


Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 1247 seconds.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [mu_b1, sigma_b1, intercept, beta_ffmc, error_beta]


Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 418 seconds.


In [15]:
abs_diff_pop_group_aspect = idata_ffmc_aspect.posterior.beta_ffmc[0, :] - idata_ffmc_aspect.posterior.mu_b1[0, :]
rel_diff_pop_group_aspect = idata_ffmc_aspect.posterior.beta_ffmc[0, :] / idata_ffmc_aspect.posterior.mu_b1[0, :]

abs_diff_pop_group_canopy_cover = idata_ffmc_canopy_cover.posterior.beta_ffmc[0, :] - idata_ffmc_canopy_cover.posterior.mu_b1[0, :]
rel_diff_pop_group_canopy_cover = idata_ffmc_canopy_cover.posterior.beta_ffmc[0, :] / idata_ffmc_canopy_cover.posterior.mu_b1[0, :]

abs_diff_pop_group_forest_type = idata_ffmc_forest_type.posterior.beta_ffmc[0, :] - idata_ffmc_forest_type.posterior.mu_b1[0, :]
rel_diff_pop_group_forest_type = idata_ffmc_forest_type.posterior.beta_ffmc[0, :] / idata_ffmc_forest_type.posterior.mu_b1[0, :]

### Using only data with location uncertainty < 250m

In [51]:
# read training data
train_data = gpd.read_file(paths["training_data"])
train_data = train_data.loc[:, ["ffmc", "aspect", "foresttype", "canopy_cov", "fire", "date", "Pufferradi"]]

In [53]:
# Using only fire samples with low location uncertainty
fire_samples = train_data[train_data.fire == 1]
non_fire_samples = train_data[train_data.fire == 0]
fire_samples.Pufferradi = fire_samples.Pufferradi.astype("int")
fire_samples = fire_samples[fire_samples.Pufferradi <= 250]
non_fire_samples = non_fire_samples.sample(len(fire_samples))
train_data = pd.concat([fire_samples, non_fire_samples])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fire_samples.Pufferradi = fire_samples.Pufferradi.astype("int")


In [57]:
# data preprocessing
# remove entries with missing values in forest type & canopy cover
train_data.dropna(subset=("foresttype", "canopy_cov"), inplace=True)
train_data["foresttype"] = train_data.foresttype.astype("int")
train_data["aspect_categorized"] = train_data.aspect.apply(convert_aspect_to_cardinal_direction).astype("int")
train_data["canopy_cover_categorized"] = train_data.canopy_cov.apply(convert_canopy_cover_to_classes).astype("int")
train_data["ffmc_categorized"] = train_data.ffmc.apply(convert_ffmc_to_classes).astype("int")

In [60]:
coords = {"aspect_groups": [0, 1, 2, 3, 4, 5, 6, 7], 
          "foresttype_groups": [0, 1, 2, 3, 4, 5, 6], 
          "canopy_cover_groups": [0, 1, 2, 3, 4], 
          "ffmc_groups": [0, 1, 2, 3, 4]}

In [61]:
# Split data temporally 
# Older samples (70%) will be used for training; newer samples (30%) will be used for evaluation
train_df, test_df = temporal_train_test_split(train_data, "date", 0.7)
relevant_columns = ["ffmc", "foresttype", "aspect_categorized", "canopy_cover_categorized", "ffmc_categorized", "date"]
X_train, y_train = train_df.loc[:, relevant_columns], train_df.loc[:, "fire"]
X_test, y_test = test_df.loc[:, relevant_columns], test_df.loc[:, "fire"]

scaler = StandardScaler()
X_train["ffmc"] = scaler.fit_transform(X_train[["ffmc"]])
X_test["ffmc"] = scaler.transform(X_test[["ffmc"]])


In [62]:
model_ffmc_aspect = create_model_ffmc_adjustment_aspect(X_train, y_train, coords)
model_ffmc_canopy_cover = create_model_ffmc_adjustment_canopy_cover(X_train, y_train, coords)
model_ffmc_forest_type = create_model_ffmc_adjustment_foresttype(X_train, y_train, coords)

In [64]:
with model_ffmc_aspect:
    idata_ffmc_aspect = pm.sample(2000, target_accept=0.9, random_seed=0)

with model_ffmc_canopy_cover:
    idata_ffmc_canopy_cover = pm.sample(2000, target_accept=0.9, random_seed=0)

with model_ffmc_forest_type:
    idata_ffmc_forest_type = pm.sample(2000, target_accept=0.9, random_seed=0)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [mu_b1, sigma_b1, intercept, beta_ffmc, error_beta]


Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 232 seconds.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [mu_b1, sigma_b1, intercept, beta_ffmc, error_beta]


Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 824 seconds.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [mu_b1, sigma_b1, intercept, beta_ffmc, error_beta]


Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 268 seconds.


In [66]:
abs_diff_pop_group_aspect = idata_ffmc_aspect.posterior.beta_ffmc[0, :] - idata_ffmc_aspect.posterior.mu_b1[0, :]
rel_diff_pop_group_aspect = idata_ffmc_aspect.posterior.beta_ffmc[0, :] / idata_ffmc_aspect.posterior.mu_b1[0, :]

abs_diff_pop_group_canopy_cover = idata_ffmc_canopy_cover.posterior.beta_ffmc[0, :] - idata_ffmc_canopy_cover.posterior.mu_b1[0, :]
rel_diff_pop_group_canopy_cover = idata_ffmc_canopy_cover.posterior.beta_ffmc[0, :] / idata_ffmc_canopy_cover.posterior.mu_b1[0, :]

abs_diff_pop_group_forest_type = idata_ffmc_forest_type.posterior.beta_ffmc[0, :] - idata_ffmc_forest_type.posterior.mu_b1[0, :]
rel_diff_pop_group_forest_type = idata_ffmc_forest_type.posterior.beta_ffmc[0, :] / idata_ffmc_forest_type.posterior.mu_b1[0, :]

### Use forest type, canopy cover & aspect as additional covariates (<250)

In [83]:
model_all_cov = create_model_ffmc_adjustment_exp2(X_train, y_train, coords)

In [101]:
with model_all_cov:
    idata_all_cov = pm.sample(2000, target_accept=0.9, random_seed=0)


KeyboardInterrupt: 

In [102]:
save_model("../../../models/ffmc_adjustment/model_all_cov.pkl", model_all_cov, idata_all_cov)

In [96]:
abs_diff_pop_group_aspect = idata_all_cov.posterior.beta_aspect[0, :] - idata_all_cov.posterior.mu_b1[0, :]
rel_diff_pop_group_aspect = idata_all_cov.posterior.beta_aspect[0, :] / idata_all_cov.posterior.mu_b1[0, :]

abs_diff_pop_group_foresttype = idata_all_cov.posterior.beta_foresttype[0, :] - idata_all_cov.posterior.mu_b2[0, :]
rel_diff_pop_group_foresttype = idata_all_cov.posterior.beta_foresttype[0, :] / idata_all_cov.posterior.mu_b2[0, :]

abs_diff_pop_group_canopy_cover = idata_all_cov.posterior.beta_canopy_cover[0, :] - idata_all_cov.posterior.mu_b3[0, :]
rel_diff_pop_group_canopy_cover = idata_all_cov.posterior.beta_canopy_cover[0, :] / idata_all_cov.posterior.mu_b3[0, :]


In [99]:
print(rel_diff_pop_group_aspect.mean(axis=0).values)
print(rel_diff_pop_group_foresttype.mean(axis=0).values)
print(rel_diff_pop_group_canopy_cover.mean(axis=0).values)

[-1.92482847  6.08290909  9.83965438 -4.74073033 -5.45313243  1.65161698
  2.32705053 17.33314128]
[0.9294754  0.88480581 0.83056425 1.06081811 1.15476427 0.96500433
 1.06257585]
[-0.73648449 -0.78196761  1.70983021  1.13866236  0.439639  ]
