In [15]:
''' 
    Version 2.0 - Support Vector Classifier
    Data Source: European Space Agency - ERA5
                 Government of Alberta - Historical Wildfire registry and Fire Weather Indices
                 Natural Resources Canada - Vegetation Classification of Canada
'''
# General imports
import pandas as pd

# SVM specific functions
from SVM_functions import SVM_test_train_validation_split, SVM_pipeline

# Other imports appear where needed. Specifically, imports joblib and parts of sklearn.

In [17]:
## Test, Train, Validation Splits ##

# Load wildfire dataframe and convert date column to datetime
main_df = pd.read_csv("downsampled_df.csv",index_col=0)
main_df['date'] = pd.to_datetime(main_df['date'])

# Create training and validations dataframes
validation_df = main_df[main_df['date'] > pd.Timestamp('2019-01-01')]
test_train_df = main_df[main_df['date'] < pd.Timestamp('2019-01-01')]

# Get splits
X_train, X_test, X_validation, y_train, y_test, y_validation = SVM_test_train_validation_split(validation_df,test_train_df)

In [21]:
## Pipeline ##

pass_features = [ 'leaf_area_index_high_vegetation']
categorical_features = []
numeric_features = ['fire_count_past_3Days', 'global_noon_LST_2m_temperature', 'FFMC', 'DMC', 'FWI', 'BUI', 'global_noon_LST_relative_humidity', '24hr_max_temperature']
Data_pipeline = SVM_pipeline(pass_features,categorical_features,numeric_features)

# Prepare the train and test data
X_train_processed=Data_pipeline.fit_transform(X_train)
X_test_processed=Data_pipeline.transform(X_test)

In [24]:
## Grid Search for Current Model ##

from sklearn.metrics import accuracy_score, f1_score, make_scorer
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

#F1 scorer
f1 = make_scorer(f1_score , average='macro')

# Parameters
p_grid = {"C": [6, 8, 10],
          "gamma": ['scale', 0.01, 0.1]
}

sv_classifier = SVC(kernel="rbf",random_state=42)
grid_search = GridSearchCV(sv_classifier, p_grid, cv=4, scoring=f1,n_jobs=-1,verbose=1)
grid_search.fit(X_train_processed, y_train)

# Best parameters and best score
print("Best parameters:", grid_search.best_params_)
print(f"F1 Score: {100*grid_search.best_score_:.2f}%")

best_classifier = grid_search.best_estimator_

# Fitting 4 folds for each of 9 candidates, totalling 36 fits
# Best parameters: {'C': 10, 'gamma': 0.1}
# F1 Score: 74.14%

Fitting 4 folds for each of 9 candidates, totalling 36 fits
Best parameters: {'C': 10, 'gamma': 0.1}
F1 Score: 74.14%


In [25]:
## Save the model ##

import joblib
joblib.dump(best_classifier, 'svc_model_reduced_features_V1.joblib')

['svc_model_reduced_features_V1.joblib']

In [26]:
## Performance on Validation Set ##

# Predict on the validation set
y_test_pred = best_classifier.predict(X_test_processed)

# Evaluate the accuracy of the model on the validation set
accuracy = accuracy_score(y_test, y_test_pred)
print("Validation set accuracy:", accuracy)
print('f1 score: ',f1_score(y_test, y_test_pred))

Validation set accuracy: 0.8240497250776955
f1 score:  0.6003800732998507
