In [1]:
import sys
sys.path.append("../")

In [2]:
import pandas as pd
import cloudpickle
import joblib
import rasterio

from config.config import BASE_PATH, PATH_TO_PATH_CONFIG_FILE
from src.utils import load_paths_from_yaml, replace_base_path
from src.modeling.encodings import convert_aspect_to_cardinal_direction
from src.modeling.predictions import BinaryClassification



In [3]:
paths = load_paths_from_yaml(PATH_TO_PATH_CONFIG_FILE)
paths = replace_base_path(paths, BASE_PATH)
path_to_blr_model = "../models/blr_pickle.pkl"

feature_layers = [
    ("population_density", paths["population_layers"]["2021"]["final"]),
    ("farmyard_density", paths["farmyard_density"]["final"]),
    ("hikingtrail_density", paths["roads"]["hikingtrails"]["final"]),
    ("forestroad_density", paths["roads"]["forestroads"]["final"]),
    ("railway_density", paths["railways"]["final"]),
    ("elevation", paths["topographical_layers"]["elevation"]["final"]),
    ("slope", paths["topographical_layers"]["slope"]["final"]),
    ("aspect", paths["topographical_layers"]["aspect"]["final"]),
    ("forest_type", paths["forest_type"]["final"])
]

In [5]:

def load_pymc_model(path_to_model: str):
    """loads pymc model and trace"""

    with open(path_to_model, 'rb') as buff:
        model_dict = cloudpickle.load(buff)
    idata = model_dict['idata']
    model = model_dict['model']
    return model, idata


def load_static_layers_into_df(feature_layers: list) -> pd.DataFrame:
    """loading all feature layers and saving as dataframe using the names stored in each tuple as column names"""

    data = {}

    for name, path in feature_layers:
        with rasterio.open(path) as src:
            data[name] = src.read(1).flatten()

    return pd.DataFrame(data)

def preprocess_data(features_df: pd.DataFrame):
    """preprocess data"""

    training_order_columns = ['ffmc', 'farmyard_density', 'hikingtrail_density', 'forestroad_density',
                            'railway_density', 'elevation', 'slope', 'population_density']

    features_df["aspect_encoded"] = features_df["aspect"].apply(
        convert_aspect_to_cardinal_direction)
    features_df["forest_type"].replace(-1, 6, inplace=True)
    features_df["forest_type"] = features_df["forest_type"].astype(int)
    features_df_reordered = features_df[training_order_columns]

    preprocessor = joblib.load('../models/blr_preprocessor.pkl')
    features_transformed = preprocessor.transform(features_df_reordered)

    features_transformed_df = pd.DataFrame(
        features_transformed, columns=training_order_columns)
    features_transformed_df["forest_type"] = features_df["forest_type"]
    features_transformed_df["aspect_encoded"] = features_df["aspect_encoded"]

    return features_transformed_df


def make_predictions(path_to_model: str, features_df: pd.DataFrame):
    """make predictions"""

    features_df["forest_type"] = 1
    X_test = features_df.sample(10000)

    y_dummy = [0 for i in range(len(X_test))]
    X_new_blr = {
        "elevation": X_test.elevation,
        "slope": X_test.slope,
        "aspect": X_test.aspect_encoded,
        "forestroad_density": X_test.forestroad_density,
        "railway_density": X_test.railway_density,
        "hikingtrail_density": X_test.hikingtrail_density,
        "farmyard_density": X_test.farmyard_density,
        "population": X_test.population_density,
        "forest_type": X_test.forest_type,
        "ffmc": X_test.ffmc,
        "fire": y_dummy
        }
    
    model, idata = load_pymc_model(path_to_model)
    blr_prediction_obj = BinaryClassification(model, idata, X_new_blr, 0, "y_pred", "p")
    blr_prediction_obj.extend_trace()
    preds = blr_prediction_obj.predict()
    return preds

In [8]:
features_df = load_static_layers_into_df(feature_layers)


In [9]:
features_df

Unnamed: 0,population_density,farmyard_density,hikingtrail_density,forestroad_density,railway_density,elevation,slope,aspect,forest_type
0,0.0,0.0,0.0,0.0,0.0,-3.402823e+38,-9999.0,-9999.0,-1.0
1,0.0,0.0,0.0,0.0,0.0,-3.402823e+38,-9999.0,-9999.0,-1.0
2,0.0,0.0,0.0,0.0,0.0,-3.402823e+38,-9999.0,-9999.0,-1.0
3,0.0,0.0,0.0,0.0,0.0,-3.402823e+38,-9999.0,-9999.0,-1.0
4,0.0,0.0,0.0,0.0,0.0,-3.402823e+38,-9999.0,-9999.0,-1.0
...,...,...,...,...,...,...,...,...,...
28090067,0.0,0.0,0.0,0.0,0.0,-3.402823e+38,-9999.0,-9999.0,-1.0
28090068,0.0,0.0,0.0,0.0,0.0,-3.402823e+38,-9999.0,-9999.0,-1.0
28090069,0.0,0.0,0.0,0.0,0.0,-3.402823e+38,-9999.0,-9999.0,-1.0
28090070,0.0,0.0,0.0,0.0,0.0,-3.402823e+38,-9999.0,-9999.0,-1.0


In [None]:

features_df["ffmc"] = 85
features_df_preprocessed = preprocess_data(features_df)
preds = make_predictions(path_to_blr_model, features_df_preprocessed)