This is the code for random forest training by Raymond Xu.

 In this edition, the longitude and latitude of the original wildfire dataset is used as inputs.

 Here the temporal sequence is not considered in the data splitting.


In [None]:
''' This is Research Project titled ML Algorithms for Alberta Forest Occurence Prediction.
    This is the 8th Engineering Research Project, and is hereby delcared as

                            Project Christopher

    Version 1.0 - Long Short-Term Memory Classifier
    Data Source: European Space Agency - ERA5
                 Government of Alberta - Historical Wildfire registry and Fire Weather Indices
                 Natural Resources Canada - Vegetation Classification of Canada

    AI Diederik - Hongik Ingan, For the Benefits of All Mankind
'''

import math
import numpy as np
import pandas as pd
from math import floor
from tensorflow import keras
from keras.models import Sequential
from keras.layers import LSTM,Dense
from sklearn import set_config
from sklearn.pipeline import Pipeline, FeatureUnion, make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer, StandardScaler, OneHotEncoder,LabelEncoder
set_config(transform_output = "pandas")

In [None]:
# Load wildfire and non-wildfire datasets
wf_df = pd.read_csv("G:/Shared drives/MECE 788 - Forest Fire Prediction/04_Preprocessing/Cleanup_and_point_selection/wf_original lon and lat.csv",index_col=0)
nwf_df = pd.read_csv("G:/Shared drives/MECE 788 - Forest Fire Prediction/04_Preprocessing/Cleanup_and_point_selection/downsampled_df.csv",index_col=0)

# Remove the first unnamed column
wf_df=wf_df.iloc[: , 1:]

# Add a column to indicate whether the data point represents wildfire or not

wf_df=wf_df.drop(columns={'latitude_ERA5','longitude_ERA5'})

nwf_df=nwf_df.rename({
    'latitude_ERA5':'latitude',
    'longitude_ERA5':'longitude'
})

combined_data = pd.concat([wf_df, nwf_df], ignore_index=True)

print(f"The oldest date in the original dataset is:{max(combined_data['date'])}")

The oldest date in the original dataset is:2019-10-29


Proceed splitting the data

In [None]:
# Define Split ratio, aka percentage of the combined data goes to training
train_ratio = 0.7
test_ratio = 1 - train_ratio

combined_data = combined_data.sort_values(by='date')

train_size = int(train_ratio * len(combined_data))
test_size = int(test_ratio * len(combined_data))

train_data = combined_data[:train_size]
test_data = combined_data[train_size:train_size+test_size]

X_train=train_data.drop(columns={'fire','date'})
X_test=test_data.drop(columns={'fire','date'})
y_train = train_data['fire']
y_test = test_data['fire']


In [None]:
# Manual separate numerical and categorical columns
pass_features = ['high_vegetation_cover',
       'low_vegetation_cover','type_of_high_vegetation', 'type_of_low_vegetation']
numeric_features = train_data.drop(columns=pass_features).keys().drop(['fire','date'])
numeric_features = numeric_features.drop(['24hr_max_temperature'])
#numeric_features = numeric_features.insert(-1,'distance_to_road') Resevered for future development
feature_names ={'reserved'}#Columns to keep

# Define numeric and categorical transformer below
scale=ColumnTransformer([('scale_transformer',StandardScaler(),numeric_features)],verbose_feature_names_out=False).set_output(transform='pandas')

#cate=ColumnTransformer([('categorical_transformer',OneHotEncoder(sparse_output=False),categorical_features)],verbose_feature_names_out=False).set_output(transform='pandas')
#suspending onehote encoder

pss=ColumnTransformer([('Pass_transformer','passthrough',pass_features)],verbose_feature_names_out=False).set_output(transform='pandas')
Drop_transformer=ColumnTransformer([('Drop_transformer','passthrough',feature_names)],verbose_feature_names_out=False).set_output(transform='pandas')

Data_pipeline = Pipeline(steps=[
    ('Feature Union',FeatureUnion([('numeric', scale),('pass',pss)])),
    ]
    )

In [None]:
X_train_processed = Data_pipeline.fit_transform(X_train)
X_test_processed = Data_pipeline.transform(X_test)
X_train_processed = np.resize(X_train_processed,(X_train_processed.shape[0],1,X_train_processed.shape[1]))
X_test_processed = np.resize(X_test_processed,(X_test_processed.shape[0],1,X_test_processed.shape[1]))


Setup LSTM model that is compatible for hyperparameter tuning

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense, LSTM, Dropout, Input

from keras_tuner.tuners import Hyperband
import keras_tuner as kt
import tensorflow.keras.backend as K
from keras.metrics import AUC

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

def LSTM_classifier(hp):
    model = Sequential()
    model.Input_shape=(1, 25)
        # Tune the number of layers.
    for i in range(hp.Int("num_layers", 1, 2)):
        if i==1:
            model.add(LSTM(units=hp.Int(f'units_{i}',min_value=25*2,max_value=25*15,step=25),
                activation=hp.Choice('activation_1',['relu','tanh','sigmoid','swish','linear'])))
        if i==2:
                model.add(LSTM(units=hp.Int(f'units{i}',min_value=25*2,max_value=25*15,step=25),
                    activation=hp.Choice('activation_2',['relu','tanh','sigmoid','swish','linear'])))

    # Tune whether to use dropout.
    model.add(Dense(1,activation='sigmoid'))
    if hp.Boolean("dropout"):
        model.add(layers.Dropout(rate=0.20))
        # Define the optimizer learning rate as a hyperparameter.
    learning_rate = hp.Float("lr", min_value=1e-4, max_value=1e-2, sampling="log")
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy', metrics=[keras.metrics.BinaryAccuracy(),keras.metrics.AUC()])
    return model


In [None]:
y_train.shape

(53334,)

Hyperband tuning set up below:

In [None]:
tuner = kt.Hyperband(LSTM_classifier,
                     objective=kt.Objective("val_auc", direction="max"),
                     max_epochs=10,
                     factor=3,
                     executions_per_trial=2,
                     overwrite=False,
                     directory='LSTM_Hyperband_tuning',
                     project_name='LSTM')

tuner.search(X_train_processed, y_train, epochs=10, validation_data=(X_test_processed, y_test))

Reloading Tuner from LSTM_Hyperband_tuning\LSTM\tuner0.json


KeyboardInterrupt: 

In [None]:
losses_f = pd.DataFrame(tuner.history.history)

losses_f.plot(figsize = (10,6))

Epoch 1/50


[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - f1_score: 0.7993 - loss: 0.6462 - val_f1_score: 0.7877 - val_loss: 0.5671
Epoch 2/50
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - f1_score: 0.7975 - loss: 0.5278 - val_f1_score: 0.7877 - val_loss: 0.5175
Epoch 3/50
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - f1_score: 0.8014 - loss: 0.4821 - val_f1_score: 0.7877 - val_loss: 0.4972
Epoch 4/50
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - f1_score: 0.8036 - loss: 0.4645 - val_f1_score: 0.7877 - val_loss: 0.4912
Epoch 5/50
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - f1_score: 0.8001 - loss: 0.4656 - val_f1_score: 0.7877 - val_loss: 0.4884
Epoch 6/50
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - f1_score: 0.7998 - loss: 0.4607 - val_f1_score: 0.7877 - val_loss: 0.4862
Epoch 7/50
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1b426efa640>