# Wildfire Prediction with Logistic Regression


In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV   
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

## Load the Data


In [None]:
train_df = pd.read_csv("wildfires_training.csv")
test_df = pd.read_csv("wildfires_test.csv")

print(train_df)
print(test_df)

    fire  year  temp  humidity  rainfall  drought_code  buildup_index  day  \
0     no  2015    28        59       0.0          8.06           3.47    1   
1     no  2010    30        61       1.3          8.17           4.03    2   
2     no  2009    26        83      13.1          8.08           3.59    3   
3     no  2017    25        87       2.5          7.18           2.42    4   
4     no  2014    28        77       0.0         14.98           4.63    5   
..   ...   ...   ...       ...       ...           ...            ...  ...   
149  yes  2017    31        67       0.0         45.15          17.89   26   
150   no  2017    29        89       4.4          8.74           6.52   27   
151   no  2009    27        88       0.5          8.87           3.71   28   
152   no  2016    25        56       0.1         15.54           6.10   29   
153   no  2012    24        62       0.2         16.72           5.75   30   

     month  wind_speed  
0        6          19  
1        6   

## Preprocess the Data


In [36]:
X_train = train_df.drop("fire", axis=1)
Y_train = train_df["fire"]

X_test = test_df.drop("fire", axis=1)
Y_test = test_df["fire"]


## Encode labels

In [37]:
encoder = LabelEncoder()
Y_train = encoder.fit_transform(Y_train)
Y_test = encoder.transform(Y_test)


## Define model and train



In [None]:
log_model = LogisticRegression(max_iter=1000)
log_model.fit(X_train,Y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,1000


In [40]:
model_accuracy  = accuracy_score(Y_test, log_model.predict(X_test))
print("The Accuracy of simple logistic regression without hyperparameter:", model_accuracy)

The Accuracy of simple logistic regression without hyperparameter: 0.9


## Parameter grid for hyperparameter tuning

In [55]:
penalties = ['l2', 'none']
C_values = [0.1, 1, 10]
solvers = ['lbfgs', 'newton-cg']

## Apply best Hyperparameter to Log regression

In [56]:
best_score = 0
best_params = None

for penalty in penalties:
    for C in C_values:
        for solver in solvers:
            try:
                model = LogisticRegression(
                    penalty=penalty, C=C, solver=solver, max_iter=1000
                )
                model.fit(X_train, y_train)
                score = accuracy_score(y_test, model.predict(X_test))

                if score > best_score:
                    best_score = score
                    best_params = (penalty, C, solver)

                print(f"penalty={penalty}, C={C}, solver={solver}, acc={score:.3f}")
            except Exception:
                continue

penalty=l2, C=0.1, solver=lbfgs, acc=0.820
penalty=l2, C=0.1, solver=newton-cg, acc=0.820
penalty=l2, C=1, solver=lbfgs, acc=0.900
penalty=l2, C=1, solver=newton-cg, acc=0.900
penalty=l2, C=10, solver=lbfgs, acc=0.880
penalty=l2, C=10, solver=newton-cg, acc=0.880


Evaludate

In [57]:
print("\nBest params:", best_params)
print("Best accuracy:", best_score)



Best params: ('l2', 1, 'lbfgs')
Best accuracy: 0.9
