# ABOUT
__Author__: Pat McCornack

__Date__: 3/21/2024

__Purpose:__ Script to apply a trained model to predict FBFM40 values. Outputs a raster of predicted FBFM40 values across the bpa service territory. 

----

In [1]:
import os
import glob

import numpy as np
import pandas as pd

import rasterio
from rasterio.merge import merge
from rasterio.windows import Window

import datetime as dt

from joblib import load
from joblib import Parallel, delayed

from IPython.display import clear_output


In [2]:
# Define filepaths
active_root_dir = r"C:\Users\mcco573\OneDrive - PNNL\Documents\_Projects\BPA Wildfire\F40_modeling"
pnnl_root_dir = r"\\pnl\projects\BPAWildfire\data\Landfire\fuels_modeling\F40_modeling"

# Choose whether to work from local or drive
active_root_dir = pnnl_root_dir

paths_dict = {
    'output_dir' : os.path.join(active_root_dir, r'model_outputs\geospatial\_bpa_service_territory'),
    'data_dir' : os.path.join(active_root_dir, r'..\LF_raster_data\bpa_service_territory'),
    'model_dir' : os.path.join(active_root_dir, 'models'),
    'model_fname' : 'LF22_F40_model_2024-05-15_15-21-33',
    'out_fname' : "LF22_F40_Pred_with_Pred_FVT" + "_"
    }

# Functions

-----

## Make Directory
Creates a directory where data will be output - labeled with the datetime that the script is run. Returns name of directory.

## Preprocess window dataframe
Prepares the data to be run through the model. Separates out null and non-burnable values. Returns a dictionary with: 
1. A clean dataframe to be run through the model. 
2. A dataframe of the dropped observations to be rejoined to model predictions. This allows for the data to be reshaped to a 2D numpy array and written as a raster. 

In [3]:
def make_dir(base_dir, file_name):
        """
        Create a directory named using the current datetime.
        Returns:
        - Name of the directory as a string
        """

        datetime = dt.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
        output_dir = os.path.join(base_dir, file_name + datetime)

        os.makedirs(output_dir)
        return output_dir

In [4]:
def data_prep(df, target='LF22_F40'):
    """
    Prepares data to run model on.
    Input: 
    - A dataframe containing created using the flattened numpy arrays returned from reading in the raster chunks. 
    Returns: 
    - Clean dataframe without NULL data or non-burnable F40 Classes 
    - Dataframe with the dropped observations - to be reappended after prediction
    """

    # Remove -9999/-1111 (Null values)
    dropped = df[(df.isin([-1111, -9999])).any(axis=1)]  # Find rows with -1111/-9999 in any column
    df = df.drop(dropped.index, axis=0)  # Drop those rows

    # Set the F40 value for dropped rows to Null
    dropped['F40_Pred'] = -9999
    
    return {"clean_df": df,
            "dropped_obs": dropped['F40_Pred']}


## Window Read Function
This function is used to read in chunks of the rull raster to be processed. The raster data is stored as blocks with height=1 and width=41854 (the raster width), so we read in chunks composed of these blocks (e.g. 1000 blocks at a time). This corresponds to the row_slice argument. 

In [5]:
def windowed_read(ras, row_slice):
  """
  Reads in a subset (window) of the data to be processed.
  Inputs:
  - ras: Raster object read in using rasterio
  - row_slice: Used to define the window height - in the form (row_start, row_end). 

  Returns:
  - data: The data in the window as a 2D numpy array
  - win: The Window object used to define the subset of the data. 
  - win_transform: The affine transform associated with the window. Used to update the metadata of the output of that chunk. 
  """
  
  with rasterio.open(ras) as src:
    col_slice = (0, src.width)  # Define row slice based on block size
    win = Window.from_slices(row_slice, col_slice) 
    data = src.read(window=win)
    win_transform = src.window_transform(win)
    
  return data, win, win_transform

## Predict using trained model
Applies a trained model to the prepared data to predict F40 for each observation (i.e. pixel). After making predictions, rejoins the previously dropped observations so that the dataframe can be reshaped to a 2D numpy array and written to a raster. The index is used to keep track of relative pixel locations in the dataframe. This function returns a dataframe with predictions for each non-null and burnable observation. 

The model was not trained using null (-9999, -1111) or non-burnable classes. It is assumed that pixels with these values are static and would not be updated as a result of disturbance - hence removing them from the data passed to the model. 

In [6]:
def predict_F40(model, df, target='LF22_F40'):
    """
    Predicts F40 class given a trained model and data to predict on.
    Returns:
    - A dataframe of predicted F40 values joined to the previously dropped values. 
    """
    # Specify the target variable  

    # Prep the data - get the a clean dataframe (i.e. no NULL data/nonburnable classes) and dropped observations
    prep_data = data_prep(df)
    clean_df = prep_data['clean_df']
    dropped = prep_data['dropped_obs']

    # Get list of predictors for run
    predictors = clean_df.columns.tolist()
    
    x_test = clean_df[predictors].copy()

    # Run model to predict
    # If clean_df is empty, then all values were NULL and are in dropped
    if clean_df.shape[0] > 0:
        y_pred = model.predict(x_test)
    else:
        return dropped
    
    # Join the dropped observations back in 
    # This allows the result dataframe to be reshaped back to a raster
    df = pd.DataFrame({"F40_Pred" : y_pred},
                       index=x_test.index)
    df = pd.concat([dropped, df])
    df.sort_index(inplace=True)

    print(df.head())

    # Return predictions
    return df


In [7]:
def window_predict(row, model_fpath, win_height, raster_dict, ras_shape, out_dir, out_meta):

    model = load(model_fpath)

    row_start = row
    row_end = row + win_height  # This is also the row_offset of the window

    # make sure slice doesn't exceed row/col dims
    if row_end > ras_shape[0]:
        row_end = ras_shape[0]

    # Define the window to be processed
    row_slice = (row_start, row_end)

    data_dict = {}

    # For the current window, load data from each rasters
    for var, fpath in raster_dict.items():
        data_chunk, data_win, data_transform = windowed_read(fpath, row_slice)
        data_dict[var] = data_chunk.ravel()

    # Create a dataframe from the dictionary of datachunks
    df = pd.DataFrame(data_dict)

    # Look at the window currently processing
    clear_output()

    datetime = dt.datetime.now().strftime('%H-%M-%S-%f')
    out_file = f"data_chunk_{datetime}.tif"
    #print(f"Row Slice: {row_slice}")  
    #print(f"Writing {out_file}.")
    #print(f"Processing window {i} of {floor(ras.shape[0] / win_height)}")

    # Run model to predict F40 Classes for window 
    out_arr = predict_F40(model, df)

    # Reshape to 2D
    out_arr_np = out_arr.to_numpy()
    out_arr_2D = out_arr_np.reshape(data_chunk.shape)
    out_arr_2D = out_arr_2D[0]

    # update output metadata for chunk
    out_meta.update({
    'height': out_arr_2D.shape[0],
    'width': out_arr_2D.shape[1],
    'transform' : data_transform
    })

    # Write chunk out
    datetime = dt.datetime.now().strftime('%H-%M-%S-%f')
    out_file = f"data_chunk_{datetime}.tif"
    with rasterio.open(os.path.join(out_dir, out_file), 'w+', **out_meta) as out:
        out.write(out_arr_2D, indexes=1)

# Run Model Predictions
----

In [8]:
# Define target name
target = 'LF22_F40'

# Define the model to import
model_fpath = os.path.join(paths_dict['model_dir'], paths_dict['model_fname'])

# Define the source raster file names
pred_FVT_fpath = r"\\pnl\projects\BPAWildfire\data\Landfire\fuels_modeling\LF_raster_data\bpa_service_territory\_predicted_rasters\LF22_Pred_FVT_2024-05-15_14-29-34.tif"
raster_paths = {
    "LF22_FVT" : pred_FVT_fpath,
    #"LF22_FVT" : os.path.join(paths_dict['data_dir'], "LC22_FVT_230_bpa.tif"),
    "LF22_FVC" : os.path.join(paths_dict['data_dir'], "LC22_FVC_230_bpa.tif"),
    "LF22_FVH" : os.path.join(paths_dict['data_dir'], "LC22_FVH_230_bpa.tif"),
    "LF22_FDST" : os.path.join(paths_dict['data_dir'], "LC22_FDst_230_bpa.tif"),
    "ZONE" : os.path.join(paths_dict['data_dir'], "us_lf_zones_bpa.tif"),
    "BPS_FRG_NE" : os.path.join(paths_dict['data_dir'], "BPS_FRG_bpa.tif")
}

# Arbitrarily grab metadata from F40 raster to use for updating output metdata
with rasterio.open(raster_paths['LF22_FVT']) as src:
  out_meta = src.meta.copy()

# Open a raster to access its attributes
ras = rasterio.open(raster_paths['LF22_FVT'])

# Create directory using current datetime to output data chunks to
datetime = dt.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')  # Used to name output file
out_dir = make_dir(base_dir=paths_dict['output_dir'], file_name=paths_dict['out_fname'])

# Define window height and iteration tracker
win_height = 1000  # Number of rows to process at once

Parallel(n_jobs=24)(delayed(window_predict)(row, model_fpath, win_height, raster_paths, ras.shape, out_dir, out_meta) for row in range(0, ras.shape[0], win_height))

## Mosaic the Data Chunks
Combines the chunks generated above into a single raster. 

In [None]:
# Get the file paths of the generated data chunks
raster_fpaths = glob.glob(out_dir + "/*.tif")

# Get the rasterio dataset objects corresponding to each path
src_files_to_mosaic = []
for fpath in raster_fpaths:
    src = rasterio.open(fpath)
    src_files_to_mosaic.append(src)

# Merge the data chunks into a single raster
mosaic, out_trans = merge(src_files_to_mosaic)

# Get the metadata for writing
out_meta = src.meta.copy()
out_meta.update({
    "driver" : "GTiff",
    "height" : mosaic.shape[1],
    "width" : mosaic.shape[2],
    "transform" : out_trans
})

# Write out the mosaic raster
fname = f"{out_dir}_{datetime}.tif"
with rasterio.open(os.path.join(out_dir, fname), "w", **out_meta) as dest:
    dest.write(mosaic)

print(f"Raster written to {os.path.join(out_dir, fname)}")