In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("../../../")

In [3]:
import pandas as pd
import geopandas as gpd
import pymc as pm
from sklearn.preprocessing import StandardScaler

from config.config import BASE_PATH, PATH_TO_PATH_CONFIG_FILE
from src.utils import load_paths_from_yaml, replace_base_path
from src.modeling.encodings import (convert_aspect_to_cardinal_direction, 
                                    convert_canopy_cover_to_classes, 
                                    convert_ffmc_to_classes)
from src.modeling.bayesian_models import (create_model_ffmc_adjustment_aspect, 
                                          create_model_ffmc_adjustment_foresttype,
                                          create_model_ffmc_adjustment_canopy_cover)
from src.modeling.utils import temporal_train_test_split, save_model, load_model
from src.modeling.predictions import BinaryClassification



In [4]:
paths = load_paths_from_yaml(PATH_TO_PATH_CONFIG_FILE)
paths = replace_base_path(paths, BASE_PATH)

In [5]:
def calculate_diff_beta_ffmc(idata):
    abs_diff = idata.posterior.beta_ffmc[0, :] - idata.posterior.mu_b1[0, :]
    rel_diff = idata.posterior.beta_ffmc[0, :] / idata.posterior.mu_b1[0, :]
    return abs_diff, rel_diff

coords = {"aspect_groups": [0, 1, 2, 3, 4, 5, 6, 7], 
          "foresttype_groups": [0, 1, 2, 3, 4, 5, 6], 
          "canopy_cover_groups": [0, 1, 2, 3, 4], 
          "ffmc_groups": [0, 1, 2, 3, 4]}

### Using all data

In [6]:
# read training data
train_data = gpd.read_file(paths["training_data"])
train_data = train_data.loc[:, ["ffmc", "aspect", "foresttype", "canopy_cov", "fire", "date"]]

# data preprocessing
# remove entries with missing values in forest type & canopy cover
train_data.dropna(inplace=True)
train_data["foresttype"] = train_data.foresttype.astype("int")
train_data["aspect_categorized"] = train_data.aspect.apply(convert_aspect_to_cardinal_direction).astype("int")
train_data["canopy_cover_categorized"] = train_data.canopy_cov.apply(convert_canopy_cover_to_classes).astype("int")
train_data["ffmc_categorized"] = train_data.ffmc.apply(convert_ffmc_to_classes).astype("int")

# Split data temporally 
# Older samples (70%) will be used for training; newer samples (30%) will be used for evaluation
train_df, test_df = temporal_train_test_split(train_data, "date", 0.7)
relevant_columns = ["ffmc", "foresttype", "aspect_categorized", "canopy_cover_categorized", "ffmc_categorized", "date"]
X_train, y_train = train_df.loc[:, relevant_columns], train_df.loc[:, "fire"]
X_test, y_test = test_df.loc[:, relevant_columns], test_df.loc[:, "fire"]

scaler = StandardScaler()
X_train["ffmc"] = scaler.fit_transform(X_train[["ffmc"]])
X_test["ffmc"] = scaler.transform(X_test[["ffmc"]])


In [10]:
model_ffmc_aspect_all_data = create_model_ffmc_adjustment_aspect(X_train, y_train, coords)
model_ffmc_canopy_cover_all_data = create_model_ffmc_adjustment_canopy_cover(X_train, y_train, coords)
model_ffmc_forest_type_all_data = create_model_ffmc_adjustment_foresttype(X_train, y_train, coords)

with model_ffmc_aspect_all_data:
    idata_ffmc_aspect_all_data = pm.sample(2000, target_accept=0.9, random_seed=0)

with model_ffmc_canopy_cover_all_data:
    idata_ffmc_canopy_cover_all_data = pm.sample(2000, target_accept=0.9, random_seed=0)

with model_ffmc_forest_type_all_data:
    idata_ffmc_forest_type_all_data = pm.sample(2000, target_accept=0.9, random_seed=0)

# save models
save_model("../../../models/ffmc_adjustment/blr_aspect_all.pkl", model_ffmc_aspect_all_data, idata_ffmc_aspect_all_data)
save_model("../../../models/ffmc_adjustment/blr_canopy_cover_all.pkl", model_ffmc_canopy_cover_all_data, idata_ffmc_canopy_cover_all_data)
save_model("../../../models/ffmc_adjustment/blr_forest_type_all.pkl", model_ffmc_forest_type_all_data, idata_ffmc_forest_type_all_data)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [mu_b1, sigma_b1, intercept, beta_ffmc, error_beta]


Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 426 seconds.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [mu_b1, sigma_b1, intercept, beta_ffmc, error_beta]


Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 1892 seconds.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [mu_b1, sigma_b1, intercept, beta_ffmc, error_beta]


Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 359 seconds.


In [12]:
# load models
model_ffmc_aspect_all_data, idata_ffmc_aspect_all_data = load_model("../../../models/ffmc_adjustment/blr_aspect_all.pkl")
model_ffmc_canopy_cover_all_data, idata_ffmc_canopy_cover_all_data = load_model("../../../models/ffmc_adjustment/blr_canopy_cover_all.pkl")
model_ffmc_forest_type_all_data, idata_ffmc_forest_type_all_data = load_model("../../../models/ffmc_adjustment/blr_forest_type_all.pkl")

In [16]:
# Model evaluation
y_dummy = [0 for i in y_test]
X_new_blr = {
    "aspect_groups_idx": X_test.aspect_categorized,
    "ffmc": X_test.ffmc,
    "fire": y_dummy
    }

seed = 0

# predictions for blr model
blr_prediction_obj = BinaryClassification(model_ffmc_aspect_all_data, idata_ffmc_aspect_all_data, X_new_blr, seed, "y_pred", "p")
blr_prediction_obj.extend_trace()
blr_preds = blr_prediction_obj.predict()

Sampling: [y_pred]


In [17]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, blr_preds.y_pred)

0.7168949771689498

### Using only data with location uncertainty < 250m

In [70]:
# read training data
train_data = gpd.read_file(paths["training_data"])
train_data = train_data.loc[:, ["ffmc", "aspect", "foresttype", "canopy_cov", "fire", "date", "Pufferradi"]]

# Using only fire samples with low location uncertainty
fire_samples = train_data[train_data.fire == 1]
non_fire_samples = train_data[train_data.fire == 0]
fire_samples.Pufferradi = fire_samples.Pufferradi.astype("int")
fire_samples = fire_samples[fire_samples.Pufferradi <= 250]
non_fire_samples = non_fire_samples.sample(len(fire_samples))
train_data = pd.concat([fire_samples, non_fire_samples])

# data preprocessing
# remove entries with missing values in forest type & canopy cover
train_data.dropna(subset=("foresttype", "canopy_cov"), inplace=True)
train_data["foresttype"] = train_data.foresttype.astype("int")
train_data["aspect_categorized"] = train_data.aspect.apply(convert_aspect_to_cardinal_direction).astype("int")
train_data["canopy_cover_categorized"] = train_data.canopy_cov.apply(convert_canopy_cover_to_classes).astype("int")
train_data["ffmc_categorized"] = train_data.ffmc.apply(convert_ffmc_to_classes).astype("int")

# Split data temporally 
# Older samples (70%) will be used for training; newer samples (30%) will be used for evaluation
train_df, test_df = temporal_train_test_split(train_data, "date", 0.7)
relevant_columns = ["ffmc", "foresttype", "aspect_categorized", "canopy_cover_categorized", "ffmc_categorized", "date"]
X_train, y_train = train_df.loc[:, relevant_columns], train_df.loc[:, "fire"]
X_test, y_test = test_df.loc[:, relevant_columns], test_df.loc[:, "fire"]

scaler = StandardScaler()
X_train["ffmc"] = scaler.fit_transform(X_train[["ffmc"]])
X_test["ffmc"] = scaler.transform(X_test[["ffmc"]])

In [27]:
model_ffmc_aspect_250m = create_model_ffmc_adjustment_aspect(X_train, y_train, coords)
model_ffmc_canopy_cover_250m = create_model_ffmc_adjustment_canopy_cover(X_train, y_train, coords)
model_ffmc_forest_type_250m = create_model_ffmc_adjustment_foresttype(X_train, y_train, coords)

with model_ffmc_aspect_250m:
    idata_ffmc_aspect_250m = pm.sample(2000, target_accept=0.9, random_seed=0)

with model_ffmc_canopy_cover_250m:
    idata_ffmc_canopy_cover_250m = pm.sample(2000, target_accept=0.9, random_seed=0)

with model_ffmc_forest_type_250m:
    idata_ffmc_forest_type_250m = pm.sample(2000, target_accept=0.9, random_seed=0)

save_model("../../../models/ffmc_adjustment/blr_aspect_250m.pkl", model_ffmc_aspect_250m, idata_ffmc_aspect_250m)
save_model("../../../models/ffmc_adjustment/blr_canopy_cover_250m.pkl", model_ffmc_canopy_cover_250m, idata_ffmc_canopy_cover_250m)
save_model("../../../models/ffmc_adjustment/blr_forest_type_250m.pkl", model_ffmc_forest_type_250m, idata_ffmc_forest_type_250m)


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [mu_b1, sigma_b1, intercept, beta_ffmc, error_beta]


Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 214 seconds.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [mu_b1, sigma_b1, intercept, beta_ffmc, error_beta]


Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 597 seconds.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [mu_b1, sigma_b1, intercept, beta_ffmc, error_beta]


Sampling 2 chains for 1_000 tune and 2_000 draw iterations (2_000 + 4_000 draws total) took 232 seconds.


In [76]:
model_ffmc_aspect_250m, idata_ffmc_aspect_250m = load_model("../../../models/ffmc_adjustment/blr_aspect_250m.pkl")
model_ffmc_canopy_cover_250m, idata_ffmc_canopy_cover_250m = load_model("../../../models/ffmc_adjustment/blr_canopy_cover_250m.pkl")
model_ffmc_forest_type_250m, idata_ffmc_forest_type_250m = load_model("../../../models/ffmc_adjustment/blr_forest_type_250m.pkl")

In [83]:
# Model evaluation
y_dummy = [0 for i in y_test]
X_new_blr = {
    "aspect_categorized": X_test.aspect_categorized,
    "canopy_cover_categorized": X_test.canopy_cover_categorized,
    "foresttype": X_test.foresttype,
    "ffmc": X_test.ffmc,
    "fire": y_dummy
    }

seed = 0

# predictions for blr model
# blr_prediction_obj = BinaryClassification(model_ffmc_aspect_250m, idata_ffmc_aspect_250m, X_new_blr, seed, "y_pred", "p")
# blr_prediction_obj.extend_trace()
# blr_preds = blr_prediction_obj.predict()

# predictions for blr model
blr_prediction_obj = BinaryClassification(model_ffmc_forest_type_250m, idata_ffmc_forest_type_250m, X_new_blr, seed, "y_pred", "p")
blr_prediction_obj.extend_trace()
blr_preds = blr_prediction_obj.predict()

KeyError: 'aspect_categorized'

### FFMC Adjustment

In [82]:
abs_diff_ffmc_beta_cc_all, abs_diff_ffmc_beta_cc_rel = calculate_diff_beta_ffmc(idata_ffmc_canopy_cover_all_data)
abs_diff_ffmc_beta_aspect_all, abs_diff_ffmc_beta_aspect_rel = calculate_diff_beta_ffmc(idata_ffmc_aspect_all_data)
abs_diff_ffmc_beta_ft_all, abs_diff_ffmc_beta_ft_rel = calculate_diff_beta_ffmc(idata_ffmc_forest_type_all_data)

abs_diff_ffmc_beta_cc_250m,  rel_diff_ffmc_beta_cc_250m = calculate_diff_beta_ffmc(idata_ffmc_canopy_cover_250m)
abs_diff_ffmc_beta_aspect_250m, rel_diff_ffmc_beta_aspect_250m = calculate_diff_beta_ffmc(idata_ffmc_aspect_250m)
abs_diff_ffmc_beta_ft_250m, rel_diff_ffmc_beta_ft_250m = calculate_diff_beta_ffmc(idata_ffmc_forest_type_250m)