In [202]:
# Initial imports
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, classification_report

In [203]:
# Load the data
file_path = Path("Resources/FW_Veg_Rem_Combined.csv")
fires_df = pd.read_csv(file_path)
fires_df = fires_df[["fire_size","fire_size_class","state","Vegetation","Temp_pre_30","Temp_pre_15","Temp_pre_7","Temp_cont","Wind_pre_30","Wind_pre_15","Wind_pre_7","Wind_cont","Hum_pre_30","Hum_pre_15","Hum_pre_7","Hum_cont","Prec_pre_30","Prec_pre_15","Prec_pre_7","Prec_cont","remoteness"]].copy()
fires_df.head(10)

Unnamed: 0,fire_size,fire_size_class,state,Vegetation,Temp_pre_30,Temp_pre_15,Temp_pre_7,Temp_cont,Wind_pre_30,Wind_pre_15,...,Wind_cont,Hum_pre_30,Hum_pre_15,Hum_pre_7,Hum_cont,Prec_pre_30,Prec_pre_15,Prec_pre_7,Prec_cont,remoteness
0,10.0,C,PR,12,24.480974,24.716923,24.902597,24.527961,4.341807,3.492857,...,3.250413,78.21659,76.79375,76.381579,78.72437,0.0,0.0,0.0,0.0,0.017923
1,3.0,B,TN,15,7.553433,7.01,0.343529,10.448298,2.709764,2.881707,...,2.12232,70.84,65.858911,55.505882,81.682678,59.8,8.4,0.0,86.8,0.184355
2,60.0,C,MS,16,4.97193,5.782766,5.55875,13.6966,3.364499,2.92383,...,3.36905,75.531629,75.868613,76.812834,65.0638,168.8,42.2,18.1,124.5,0.194544
3,1.0,B,NV,0,16.275967,18.996181,18.142564,0.0,4.054982,3.398329,...,0.0,44.778429,37.140811,35.353846,0.0,10.4,7.2,0.0,0.0,0.487447
4,2.0,B,LA,12,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.214633
5,1.0,B,GA,12,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.139643
6,5.2,B,GA,12,14.877341,16.409326,0.0,0.0,2.000214,1.727202,...,0.0,79.896679,73.431818,0.0,0.0,26.0,0.0,0.0,0.0,0.148904
7,1.0,B,TX,12,16.851939,16.997783,20.434783,11.98556,1.331257,1.472949,...,2.148857,72.899478,75.061381,77.924623,70.732911,28.4,27.5,1.2,55.4,0.241894
8,1.0,B,AR,15,26.655241,27.26487,28.968064,28.682688,1.768074,1.705297,...,2.10309,68.319022,67.575419,65.077844,60.196858,6.6,3.3,0.0,46.4,0.224629
9,1.0,B,ND,15,4.60095,6.861878,6.053333,0.0,6.38076,6.334254,...,0.0,64.606509,55.943038,54.337838,0.0,12.3,1.8,0.0,0.0,0.291683


In [204]:
# Filter Out unusable data
fires_df = fires_df[fires_df["state"]=="CA"]
fires_df = fires_df[fires_df["Temp_pre_30"]>0]
fires_df = fires_df[fires_df["Temp_pre_15"]>0]
fires_df = fires_df[fires_df["Temp_pre_7"]>0]
fires_df = fires_df[fires_df["Temp_cont"]>0]
fires_df = fires_df[fires_df["Wind_pre_30"]>0]
fires_df = fires_df[fires_df["Wind_pre_15"]>0]
fires_df = fires_df[fires_df["Wind_pre_7"]>0]
fires_df = fires_df[fires_df["Wind_cont"]>0]
fires_df = fires_df[fires_df["Hum_pre_30"]>0]
fires_df = fires_df[fires_df["Hum_pre_15"]>0]
fires_df = fires_df[fires_df["Hum_pre_7"]>0]
fires_df = fires_df[fires_df["Hum_cont"]>0]
fires_df = fires_df[fires_df["Prec_pre_30"]>-1]
fires_df = fires_df[fires_df["Prec_pre_15"]>-1]
fires_df = fires_df[fires_df["Prec_pre_7"]>-1]
fires_df = fires_df[fires_df["Prec_cont"]>-1]
fires_df.head(10)

Unnamed: 0,fire_size,fire_size_class,state,Vegetation,Temp_pre_30,Temp_pre_15,Temp_pre_7,Temp_cont,Wind_pre_30,Wind_pre_15,...,Wind_cont,Hum_pre_30,Hum_pre_15,Hum_pre_7,Hum_cont,Prec_pre_30,Prec_pre_15,Prec_pre_7,Prec_cont,remoteness
78,450.0,E,CA,16,27.850483,28.165097,28.499405,33.9,2.58,2.664543,...,2.2375,44.706207,42.983379,49.0,39.375,0.0,0.0,0.0,0.0,0.47215
88,3.0,B,CA,15,19.685119,19.242174,19.495294,19.063744,3.893413,3.755263,...,3.751422,67.688935,68.16055,64.461538,66.811705,0.0,0.0,0.0,0.0,0.50175
192,1.0,B,CA,14,11.748991,13.265223,15.781065,14.75,1.548318,1.497429,...,1.102083,61.600837,60.522481,58.560897,62.888889,2.5,2.5,0.0,0.0,0.501108
274,1.3,B,CA,16,18.601678,17.351402,17.417453,13.176536,1.93557,2.238318,...,2.894382,58.164848,52.572127,50.336788,68.926554,0.0,0.0,0.0,27.9,0.461236
302,5.0,B,CA,12,17.951613,17.726636,18.071066,19.908578,2.254147,2.099766,...,1.995372,73.609502,74.295508,73.903553,79.30603,2.9,1.5,0.6,2.1,0.466788
309,2.0,B,CA,0,4.610849,5.639908,7.474468,8.600308,1.652524,1.36789,...,2.131963,72.852975,70.987654,76.985714,78.735746,12.2,3.8,3.8,49.1,0.499333
394,1.0,B,CA,0,16.092461,19.91453,21.184049,23.756098,3.233091,3.683086,...,4.05686,57.811463,45.103343,34.831081,43.1712,29.2,0.0,0.0,0.0,0.501295
436,1.0,B,CA,0,26.329749,28.340288,30.689062,28.071181,2.667626,2.358993,...,2.127778,33.115108,29.446043,27.703125,41.237762,0.0,0.0,0.0,0.0,0.497625
442,2.0,B,CA,0,22.488692,23.002949,20.030556,16.693987,2.714033,2.576944,...,2.174346,51.244202,50.099462,60.648045,55.608639,12.0,12.0,12.0,8.8,0.499643
453,1.0,B,CA,14,23.426667,27.206389,25.695833,26.551489,3.217222,2.839167,...,3.064965,43.815385,39.416667,42.446429,39.747518,21.0,0.0,0.0,3.0,0.520804


In [205]:
# Modify Fire size class column for binary classifier
fires_df["fire_size_class"] = fires_df["fire_size_class"].replace('G',1)
fires_df["fire_size_class"] = fires_df["fire_size_class"].replace('B',0)
fires_df["fire_size_class"] = fires_df["fire_size_class"].replace('C',0)
fires_df["fire_size_class"] = fires_df["fire_size_class"].replace('D',0)
fires_df["fire_size_class"] = fires_df["fire_size_class"].replace('E',0)
fires_df["fire_size_class"] = fires_df["fire_size_class"].replace('F',0)
fires_df["fire_size_class"] = fires_df["fire_size_class"].astype(int)

In [206]:
fires_df.dtypes

fire_size          float64
fire_size_class      int64
state               object
Vegetation           int64
Temp_pre_30        float64
Temp_pre_15        float64
Temp_pre_7         float64
Temp_cont          float64
Wind_pre_30        float64
Wind_pre_15        float64
Wind_pre_7         float64
Wind_cont          float64
Hum_pre_30         float64
Hum_pre_15         float64
Hum_pre_7          float64
Hum_cont           float64
Prec_pre_30        float64
Prec_pre_15        float64
Prec_pre_7         float64
Prec_cont          float64
remoteness         float64
dtype: object

In [207]:
# Separate the Features (X) from the Target (y)
y = fires_df["fire_size_class"]
X = fires_df.drop(columns=["fire_size_class","fire_size","state"])

In [208]:
# Split our data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=5, 
                                                    stratify=y)
X_train.shape


(1357, 18)

In [209]:
# Create a Logistic Regression Model
classifier = LogisticRegression(solver='lbfgs',
                                max_iter=1000,
                                random_state=5)

In [210]:
# Fit (train) or model using the training data
classifier.fit(X_train, y_train)

LogisticRegression(max_iter=1000, random_state=5)

In [211]:
# Make predictions
y_pred = classifier.predict(X_test)
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test}).reset_index(drop=True)
results.head(20)

Unnamed: 0,Prediction,Actual
0,0,0
1,0,1
2,0,0
3,0,0
4,0,0
5,0,0
6,0,0
7,1,0
8,0,0
9,0,0


In [212]:
# Print the balanced accuracy score
print(accuracy_score(y_test, y_pred))

0.9315673289183223


In [213]:
# Print the confusion matrix
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[379  11]
 [ 20  43]]


In [214]:
# Print the classification report
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.95      0.97      0.96       390
           1       0.80      0.68      0.74        63

    accuracy                           0.93       453
   macro avg       0.87      0.83      0.85       453
weighted avg       0.93      0.93      0.93       453

