This is the code for artificial neural networks training by Raymond Xu.

 In this edition, the longitude and latitude of the original wildfire dataset is used as inputs.

 Here the temporal sequence is not considered in the data splitting.


In [None]:
''' This is Research Project titled ML Algorithms for Alberta Forest Occurence Prediction.
    This is the 8th Engineering Research Project, and is hereby delcared as

                            Project Christopher

    Version 1.0 - Artificial Neural Network
    Data Source: European Space Agency - ERA5
                 Government of Alberta - Historical Wildfire registry and Fire Weather Indices
                 Natural Resources Canada - Vegetation Classification of Canada

    AI Diederik - Hongik Ingan, For the Benefits of All Mankind
'''

import math
import numpy as np
import pandas as pd
from math import floor
from tensorflow import keras
from keras.models import Sequential
from keras.layers import LSTM,Dense
from sklearn import set_config
from sklearn.pipeline import Pipeline, FeatureUnion, make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer, StandardScaler, OneHotEncoder,LabelEncoder
set_config(transform_output = "pandas")

In [None]:

# Load wildfire and non-wildfire datasets
main_df = pd.read_csv("G:/Shared drives/MECE 788 - Forest Fire Prediction/04_Preprocessing/Cleanup_and_point_selection/downsampled_df.csv",index_col=0)
#main_df = pd.read_csv('downsampled_df.csv')

# Remove the first unnamed column
wf_df=main_df[main_df['fire']==1]
nwf_df=main_df[main_df['fire']==0]


Proceed splitting the data

In [None]:

# Load wildfire and non-wildfire datasets
#main_df = pd.read_csv("G:/Shared drives/MECE 788 - Forest Fire Prediction/04_Preprocessing/Cleanup_and_point_selection/downsampled_df.csv",index_col=0)

# Remove the first unnamed column
wf_df=main_df[main_df['fire']==1]
nwf_df=main_df[main_df['fire']==0]

# Define Split ratio, aka percentage of the combined data goes to training
split=[0.6,0.2,0.2]
wf_sort = wf_df.sort_values(by='date')
nwf_sort = nwf_df.sort_values(by='date')


for i in [0,1,2]:
    wf_memory=0
    nfw_memory=0
    wf_selected=[]
    nwf_selected=[]
    wf_size = int(split[i] * len(wf_df))
    nwf_size = int(split[i] * len(nwf_df))
    if i==0:
        wf_selected=wf_sort[:wf_size]
        nwf_selected=nwf_sort[:nwf_size]
        wf_memory=wf_size
        nwf_memory=nwf_size
        train_data = pd.concat([wf_selected, nwf_selected])
    if i==1:
        wf_selected=wf_sort[wf_memory:wf_memory+wf_size]
        nwf_selected=nwf_sort[nwf_memory:nwf_memory+nwf_size]
        wf_memory=wf_size
        nwf_memory=nwf_size
        test_data = pd.concat([wf_selected, nwf_selected])
    if i==2:
        wf_selected=wf_sort[wf_memory:]
        nwf_selected=nwf_sort[nwf_memory:]
        val_data = pd.concat([wf_selected, nwf_selected])

X_train = train_data.drop(columns={'fire','date'})
X_test = test_data.drop(columns={'fire','date'})
X_val = val_data.drop(columns={'fire','date'})
y_train = train_data['fire']
y_test = test_data['fire']
y_val = val_data['fire']


In [None]:
X_train.keys()

Index(['latitude', 'longitude', 'high_vegetation_cover',
       'leaf_area_index_high_vegetation', 'leaf_area_index_low_vegetation',
       'low_vegetation_cover', 'slope_of_sub_gridscale_orography',
       'type_of_high_vegetation', 'type_of_low_vegetation',
       '24hr_accumulated_precipitation', '24hr_max_temperature',
       'global_noon_LST_2m_temperature', 'global_noon_LST_relative_humidity',
       'global_noon_LST_wind_speed', 'BUI', 'DC', 'DMC', 'FFMC', 'FWI',
       'fire_count_past_3Days', 'fire_count_past_7Days',
       'fire_count_past_10Days', 'fire_count_past_30Days',
       '24hr_max_temperature_1dayLag', '24hr_max_temperature_2dayLag',
       'global_noon_LST_2m_temperature_1dayLag',
       'global_noon_LST_2m_temperature_2dayLag'],
      dtype='object')

In [None]:
# Manual separate numerical and categorical columns
pass_features = [ 'leaf_area_index_high_vegetation', 'leaf_area_index_low_vegetation', 'slope_of_sub_gridscale_orography']
categorical_features = ['type_of_high_vegetation', 'type_of_low_vegetation']
numeric_features = train_data.drop(columns=pass_features).drop(columns=categorical_features).keys().drop(['fire','date'])
#numeric_features = numeric_features.insert(-1,'distance_to_road') Resevered for future development
feature_names =['numeric__fire_count_past_30Days','numeric__DMC','numeric__global_noon_LST_2m_temperature','numeric__BUI',
                'numeric__FWI','numeric__latitude','numeric__FFMC','numeric__global_noon_LST_relative_humidity','numeric__24hr_max_temperature',
                'numeric__global_noon_LST_2m_temperature_1dayLag','pass__leaf_area_index_high_vegetation','numeric__global_noon_LST_2m_temperature_2dayLag',
                'numeric__high_vegetation_cover','numeric__24hr_max_temperature_1dayLag','numeric__low_vegetation_cover','pass__leaf_area_index_low_vegetation',
                'numeric__24hr_accumulated_precipitation']

# Define numeric and categorical transformer below
scale=ColumnTransformer([('scale_transformer',StandardScaler(),numeric_features)],verbose_feature_names_out=False).set_output(transform='pandas')

cate=ColumnTransformer([('categorical_transformer',OneHotEncoder(sparse_output=False),categorical_features)],verbose_feature_names_out=False).set_output(transform='pandas')

pss=ColumnTransformer([('Pass_transformer','passthrough',pass_features)],verbose_feature_names_out=False).set_output(transform='pandas')
Drop_transformer=ColumnTransformer([('Drop_transformer','passthrough',feature_names)],verbose_feature_names_out=False).set_output(transform='pandas')

Data_pipeline = Pipeline(steps=[
    ('Feature Union',FeatureUnion([('numeric', scale),('categorical',cate),('pass',pss)])),
    ('Drop Columns',Drop_transformer)]
    )

In [None]:
X_train_processed = Data_pipeline.fit_transform(X_train)
X_test_processed = Data_pipeline.transform(X_test)

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense

from keras_tuner import Hyperband
import keras_tuner as kt
import tensorflow.keras.backend as K
from keras.metrics import AUC



In [None]:

def ANN_classifier(hp):
    num_input_val=X_train_processed.shape[1]
    model = Sequential()
    model.Input_shape=(X_train_processed.shape[1],)
    layers.Dense(units=hp.Int(f'units_0',min_value=num_input_val*2,max_value=num_input_val*15,step=num_input_val),
                 activation=hp.Choice('activation_0',['relu','tanh','sigmoid','swish','linear']),
                 input_shape=(X.shape[1],))

        # Tune the number of layers.
    for i in range(hp.Int("num_layers", 1,2,3)):
        model.add(Dense(units=hp.Int(f'units_{i}',min_value=num_input_val*2,max_value=num_input_val*15,step=num_input_val),
            activation=hp.Choice('activation_{i}',['relu','tanh','sigmoid','swish','linear'])))
        if i>=2 and hp.Boolean("dropout"):# Tune whether to use dropout.
            model.add(layers.Dropout(rate=hp.Int(f'dropout{i}',min_value=0.2,max_value=0.5,step=0.1)))

    # Define output layer
    model.add(Dense(1,activation='sigmoid'))

    # Define the optimizer learning rate as a hyperparameter.
    learning_rate = hp.Float("lr", min_value=1e-4, max_value=1e-2, sampling="log")
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy', metrics=[keras.metrics.BinaryAccuracy(),keras.metrics.AUC()])
    return model



Setup Hyperband Tuning

In [None]:
tuner = kt.Hyperband(ANN_classifier,
                     objective=kt.Objective("val_auc", direction="max"),
                     max_epochs=10,
                     factor=3,
                     executions_per_trial=2,
                     overwrite=False,
                     directory='ANN_Hyperband_tuning',
                     project_name='ANN_Raymond_Xu')

tuner.search(X_train_processed, y_train, epochs=10, validation_data=(X_test_processed, y_test))

NameError: name 'ANN_classifier' is not defined

In [None]:
best_classifiers = tuner.get_best_models(num_models=1)

In [None]:
best_classifier=best_classifiers[0]
best_classifier.fit(x=X_train_processed, y=y_train, batch_size=64, epochs=5, verbose=1, validation_split=0.2)


In [None]:
best_classifier.save('ANN_selected parameters_RX.h5')

In [None]:
# load fitted model
# best_classifier = tfk__load_model('ANN_selected parameters_RX.h5')