In [1]:
import pickle

import numpy as np
import pandas as pd

from xgboost import XGBClassifier

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

from sklearn.model_selection import cross_val_score

## Load Dataset

In [2]:
fires = pd.read_csv("./fires_classif.csv")

In [3]:
X, y = fires.drop("fire", axis=1), fires["fire"]

# Evaluate Models

In [4]:
def evaluate(model, X=X, y=y, cv=10):
    accuracy = cross_val_score(model, X, y, cv=cv, scoring="accuracy").mean()
    precision = cross_val_score(model, X, y, cv=cv, scoring="precision").mean()    
    recall = cross_val_score(model, X, y, cv=cv, scoring="recall").mean()        
    f1 = cross_val_score(model, X, y, cv=cv, scoring="f1").mean()
    
    print(f"Accuracy: {accuracy}\nPrecision: {precision}\nRecall: {recall}\nF1: {f1}")

## Logistic Regression

In [5]:
evaluate(LogisticRegression(solver="lbfgs"))

Accuracy: 0.8638513513513513
Precision: 0.8232549593550604
Recall: 0.9007918552036198
F1: 0.8581266353615327


## Random Forest

In [6]:
evaluate(RandomForestClassifier(n_estimators=200))

Accuracy: 0.9686212998713
Precision: 0.9671166722864836
Recall: 0.9591628959276018
F1: 0.9613499947885596


## XGBoost

In [21]:
evaluate(XGBClassifier())

Accuracy: 0.9686212998713
Precision: 0.9584189318272017
Recall: 0.9746983408748114
F1: 0.9661556227879942


## SVMs

In [22]:
evaluate(SVC(gamma="auto"))

Accuracy: 0.9363256113256113
Precision: 0.9704842376312964
Recall: 0.88736802413273
F1: 0.9245580220367259


## Neural Network

In [23]:
evaluate(MLPClassifier(max_iter=1000))

Accuracy: 0.8997265122265121
Precision: 0.9018554352732616
Recall: 0.8932126696832577
F1: 0.8931529790299221


# Save the Model

In [24]:
model = XGBClassifier().fit(X, y)

In [25]:
with open("../../models/fire_proba.b", "wb") as f:
    pickle.dump(model, f)