In [1]:
import sys
sys.path.append("../..")

In [2]:
import numpy as np
import rasterio
import pandas as pd 
import matplotlib.pyplot as plt 

from src.modeling.encodings import (convert_aspect_to_cardinal_direction, 
                                    convert_population_to_classes, 
                                    convert_elevation_to_classes, 
                                    convert_slope_to_classes, 
                                    map_to_binary)
from src.modeling.utils import load_model
from src.modeling.predictions import BinaryClassification


In [3]:
def load_geotiff(file_path):
    with rasterio.open(file_path) as dataset:
        data = dataset.read()
        metadata = dataset.meta
    return data, metadata

def rasters_to_dataframe(rasters: list, feature_names: list):
    flattened_rasters = [raster.flatten() for raster in rasters]
    df = pd.DataFrame({feature_names[i]: raster for i, raster in enumerate(flattened_rasters)})
    return df

def standard_scale(value: float, mean: float, sd: float):
    return (value - mean) / sd

def prepare_inference_data(df: pd.DataFrame):
    y_dummy = [0 for i in range(len(df))]
    X_new = {
        "elevation": df.elevation_encoded,
        "slope": df.slope_encoded,
        "aspect": df.aspect_encoded,
        "forestroad_density": df.forestroad_density_bin,
        "railway_density": df.railway_density_bin,
        "hikingtrail_density": df.hikingtrail_density_bin,
        "farmyard_density": df.farmyard_density_bin,
        "population": df.population_encoded,
        "forest_type": df.forest_type,
        "ffmc": df.ffmc,
        "fire": y_dummy
        }
    return X_new

def minmax_scale(value: float, min_value: float, max_value: float):
    return (value - min_value) / (max_value - min_value)

In [4]:
path_to_elevation_layer = "../../data/processed/topographical_data/elevation_layer.tif"
path_to_slope_layer = "../../data/processed/topographical_data/slope_layer.tif"
path_to_aspect_layer = "../../data/processed/topographical_data/aspect_layer.tif"
path_to_foresttype_layer = "../../data/processed/forest_type/forest_type_layer.tif"
path_to_population_layer = "../../data/processed/population_data/population_2021_layer.tif"
path_to_forestroad_layer = "../../data/processed/road_density_layers/forestroad_density_layer.tif"
path_to_railway_layer = "../../data/processed/road_density_layers/railway_density_layer.tif"
path_to_hikingtrail_layer = "../../data/processed/road_density_layers/hikingtrail_density_layer.tif"
path_to_farmyard_layer = "../../data/processed/farmyard_density_layer/farmyard_density_layer.tif"
path_to_ref_grid = "../../data/processed/reference_grid/INCA_ref_raster_since_2013_100m.tif"

elevation_layer, elevation_layer_meta = load_geotiff(path_to_elevation_layer)
slope_layer, slope_layer_meta = load_geotiff(path_to_slope_layer)
aspect_layer, aspect_layer_meta = load_geotiff(path_to_aspect_layer)
forest_layer, forest_layer_meta = load_geotiff(path_to_foresttype_layer)
population_layer, population_layer_meta = load_geotiff(path_to_population_layer)
forestroad_layer, forestroad_layer_meta = load_geotiff(path_to_forestroad_layer)
railway_layer, railway_layer_meta = load_geotiff(path_to_railway_layer)
hikingtrail_layer, hikingtrail_layer_meta = load_geotiff(path_to_hikingtrail_layer)
farmyard_layer, farmyard_layer_meta = load_geotiff(path_to_farmyard_layer)
ref_grid, ref_grid_meta = load_geotiff(path_to_ref_grid)

In [5]:
feature_layers = [ref_grid, elevation_layer, slope_layer, aspect_layer, forest_layer, population_layer, forestroad_layer, railway_layer, hikingtrail_layer, farmyard_layer]
feature_names = ["id", "elevation", "slope", "aspect", "forest_type", "population_density", "forestroad_density", "railway_density", "hikingtrail_density", "farmyard_density"]
df = rasters_to_dataframe(feature_layers, feature_names)
df["ffmc"] = 85
df = df[df.forest_type != -1.0]

In [6]:
# preprocessing of features
df["elevation_encoded"] = df["elevation"].apply(convert_elevation_to_classes)
df["slope_encoded"] = df["slope"].apply(convert_slope_to_classes)
df["aspect_encoded"] = df["aspect"].apply(convert_aspect_to_cardinal_direction)
df["population_encoded"] = df["population_density"].apply(convert_population_to_classes)
df["farmyard_density_bin"] = df["farmyard_density"].apply(map_to_binary)
df["forestroad_density_bin"] = df["forestroad_density"].apply(map_to_binary)
df["railway_density_bin"] = df["railway_density"].apply(map_to_binary)
df["hikingtrail_density_bin"] = df["hikingtrail_density"].apply(map_to_binary)
df["forest_type"] = df["forest_type"].astype(int)
df["ffmc"] = df["ffmc"].apply(lambda x: standard_scale(x, 84.9, 6.6))

In [11]:
model_blr, idata_blr = load_model("../../models/blr.pkl")

In [10]:
df.reset_index(inplace=True, drop=True)
preds_ls = []
for chunk in np.array_split(df, 20):
    ids = chunk.id.values
    X_new = prepare_inference_data(chunk)
    blr_prediction_obj = BinaryClassification(model_blr, idata_blr, X_new, 0, "y_pred", "p", "z")
    blr_prediction_obj.extend_trace()
    blr_preds = blr_prediction_obj.predict()
    blr_preds["id"] = ids   
    preds_ls.append(blr_preds) 
preds_df = pd.concat(preds_ls)

preds_df.to_csv("../../results/predictions_blr_austria_ffmc85.csv")

  return bound(*args, **kwds)
Sampling: [y_pred]


Output()

ValueError: Input dimension mismatch: (input[1].shape[0] = 273254, input[2].shape[0] = 1565)
Apply node that caused the error: Composite{...}(ExpandDims{axis=0}.0, ffmc, AdvancedSubtensor.0, AdvancedSubtensor1.0, AdvancedSubtensor1.0, AdvancedSubtensor1.0, AdvancedSubtensor1.0, AdvancedSubtensor1.0, AdvancedSubtensor1.0, AdvancedSubtensor1.0, AdvancedSubtensor1.0, AdvancedSubtensor1.0, ExpandDims{axis=0}.0)
Toposort index: 12
Inputs types: [TensorType(float64, shape=(1,)), TensorType(float64, shape=(None,)), TensorType(float64, shape=(None,)), TensorType(float64, shape=(None,)), TensorType(float64, shape=(None,)), TensorType(float64, shape=(None,)), TensorType(float64, shape=(None,)), TensorType(float64, shape=(None,)), TensorType(float64, shape=(None,)), TensorType(float64, shape=(None,)), TensorType(float64, shape=(None,)), TensorType(float64, shape=(None,)), TensorType(float64, shape=(1,))]
Inputs shapes: [(1,), (273254,), (1565,), (273254,), (273254,), (273254,), (273254,), (273254,), (273254,), (273254,), (273254,), (273254,), (1,)]
Inputs strides: [(8,), (8,), (8,), (8,), (8,), (8,), (8,), (8,), (8,), (8,), (8,), (8,), (8,)]
Inputs values: [array([1.93237573]), 'not shown', 'not shown', 'not shown', 'not shown', 'not shown', 'not shown', 'not shown', 'not shown', 'not shown', 'not shown', 'not shown', array([0.14382157])]
Outputs clients: [['output'], ['output', bernoulli_rv{0, (0,), int64, True}(RandomGeneratorSharedVariable(<Generator(PCG64) at 0x71A354C32960>), MakeVector{dtype='int64'}.0, 4, p)]]

HINT: Re-running with most PyTensor optimizations disabled could provide a back-trace showing when this node was created. This can be done by setting the PyTensor flag 'optimizer=fast_compile'. If that does not work, PyTensor optimizations can be disabled with 'optimizer=None'.
HINT: Use the PyTensor flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node.

In [5]:
preds_df = pd.read_csv("../../results/predictions_blr_austria_ffmc85.csv", index_col=0)

In [37]:
z_pred_train_mean = idata_blr.posterior.z.values.mean()
z_pred_train_sd = idata_blr.posterior.z.values.std()
print(z_pred_train_mean, z_pred_train_sd)

-0.2873966046730663 2.635496079313077


In [46]:

preds_df["z_pred_norm"] = preds_df["z_pred"].apply(lambda x: standard_scale(x, z_pred_train_mean, z_pred_train_sd)).round(3)
preds_df["z_hdi_upper_norm"] = preds_df["z_hdi_upper"].apply(lambda x: standard_scale(x, z_pred_train_mean, z_pred_train_sd))
preds_df["z_hdi_lower_norm"] = preds_df["z_hdi_lower"].apply(lambda x: standard_scale(x, z_pred_train_mean, z_pred_train_sd))
preds_df["z_hdi_width_norm"] = preds_df["z_hdi_upper_norm"] - preds_df["z_hdi_lower_norm"]

In [49]:
preds_df.sort_values(by="p_pred", ascending=False)

Unnamed: 0,y_pred,p_pred,z_pred,p_hdi_lower,p_hdi_upper,p_hdi_width,z_hdi_lower,z_hdi_upper,z_hdi_width,id,z_pred_norm,z_hdi_upper_norm,z_hdi_lower_norm,z_hdi_width_norm,z_hdi_width_minmax_scaled
214861,1,0.998720,7.264423,0.995063,0.999961,0.004899,5.127100,9.366878,4.239778,16609505,2.865,3.663172,2.054451,1.608721,0.457946
142486,1,0.998675,7.435603,0.994941,0.999979,0.005037,4.982720,9.687740,4.705020,10641939,2.930,3.784918,1.999668,1.785250,0.548277
103376,1,0.998597,7.168217,0.995009,0.999977,0.004968,4.821476,9.100360,4.278884,16413193,2.829,3.562046,1.938486,1.623559,0.465539
93029,1,0.998508,7.073553,0.995169,0.999957,0.004789,5.021206,9.116352,4.095146,13363741,2.793,3.568113,2.014271,1.553843,0.429864
197088,1,0.998508,7.073553,0.995169,0.999957,0.004789,5.021206,9.116352,4.095146,9532915,2.793,3.568113,2.014271,1.553843,0.429864
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
154322,0,0.039503,-3.345170,0.006754,0.084947,0.078193,-4.550475,-2.270680,2.279795,14335910,-1.160,-0.752528,-1.617562,0.865034,0.077394
138401,0,0.039503,-3.345170,0.006754,0.084947,0.078193,-4.550475,-2.270680,2.279795,17507301,-1.160,-0.752528,-1.617562,0.865034,0.077394
241717,0,0.039503,-3.345170,0.006754,0.084947,0.078193,-4.550475,-2.270680,2.279795,15673064,-1.160,-0.752528,-1.617562,0.865034,0.077394
241716,0,0.039503,-3.345170,0.006754,0.084947,0.078193,-4.550475,-2.270680,2.279795,15673063,-1.160,-0.752528,-1.617562,0.865034,0.077394


In [48]:
# create raster with risk (z_pred_norm) and uncertainty width (z_hdi_width_norm)

id_to_score = pd.Series(preds_df['z_hdi_width_norm'].values, index=preds_df['id']).to_dict()

def map_id_to_score(id_val):
    return id_to_score.get(id_val, np.nan)

score_grid = np.vectorize(id_to_score.get)(ref_grid)
score_grid = score_grid.astype("float")

score_grid_meta = ref_grid_meta.copy()
score_grid_meta.update({
    'dtype': 'float32',
    'count': 1           
})

# Write the score grid to a new GeoTIFF
with rasterio.open('../../results/z_pred_norm_hdi_width_layer.tif', 'w', **score_grid_meta) as dst:
    dst.write(score_grid.astype(np.float32))