In [1443]:
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split

In [1444]:
df = pd.read_csv('forestfires.csv')
df.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


In [1445]:
# drop the unwanted datas like month day X and Y values from the data frame
df.drop(columns=['X', 'Y', 'month', 'day'], inplace=True)

# check the new data frame
print(df.head())

   FFMC   DMC     DC  ISI  temp  RH  wind  rain  area
0  86.2  26.2   94.3  5.1   8.2  51   6.7   0.0   0.0
1  90.6  35.4  669.1  6.7  18.0  33   0.9   0.0   0.0
2  90.6  43.7  686.9  6.7  14.6  33   1.3   0.0   0.0
3  91.7  33.3   77.5  9.0   8.3  97   4.0   0.2   0.0
4  89.3  51.3  102.2  9.6  11.4  99   1.8   0.0   0.0


In [1446]:
# create a new colume fire_occured based on the dat in area

# check if area colume exist in data frame
if 'area' in df.columns:
    # initialise an empty list for storing data for fireOccured based on area
    fire_occurrence = []

    # loop though the dataframe
    for i in df['area']:
        # check if the value of area is greater than 0 then append the value in list to 1 indicating fire occurred
        if i > 0:
            fire_occurrence.append(1)
        # chheck the else part for no fire happence
        else:
            fire_occurrence.append(0)

    # assign the new list as a colum in dataframe
    df['fire_occurrence'] = fire_occurrence

# check if the new colume is integrated
df.head()

Unnamed: 0,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,fire_occurrence
0,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0,0
1,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0,0
2,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0,0
3,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0,0
4,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0,0


In [1447]:
# now drop the column of area 
df.drop(columns=['area'], inplace=True )

# chheck the new data frame
df.head()

Unnamed: 0,FFMC,DMC,DC,ISI,temp,RH,wind,rain,fire_occurrence
0,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0
1,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0
2,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0
3,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0
4,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0


In [1448]:
# seperate featuyres and target variable from the data frame
# features 
X = df.drop(columns=['fire_occurrence'])

# label
Y = df['fire_occurrence']


In [1449]:
# just check the feature  
X


Unnamed: 0,FFMC,DMC,DC,ISI,temp,RH,wind,rain
0,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0
1,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0
2,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0
3,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2
4,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0
...,...,...,...,...,...,...,...,...
512,81.6,56.7,665.6,1.9,27.8,32,2.7,0.0
513,81.6,56.7,665.6,1.9,21.9,71,5.8,0.0
514,81.6,56.7,665.6,1.9,21.2,70,6.7,0.0
515,94.4,146.0,614.7,11.3,25.6,42,4.0,0.0


In [1450]:
# check the label
Y

0      0
1      0
2      0
3      0
4      0
      ..
512    1
513    1
514    1
515    0
516    0
Name: fire_occurrence, Length: 517, dtype: int64

In [1451]:
# Split data into training (90%) and testing (10%) sets
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1)


In [1452]:
# standardize numerical features (for better model performance)
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
# Apply scaling to the entire dataset
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# X_scaled = scaler.fit_transform(X)


In [1453]:
# building the model with logistic regession
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()

# fit the model with trainimg data 
model.fit(X_train_scaled, Y_train)

In [1454]:
# predict on test data set
Y_pred = model.predict(X_test_scaled)

In [1455]:
from sklearn.metrics import accuracy_score, classification_report
# Evaluate model performance
accuracy = accuracy_score(Y_test, Y_pred)
report = classification_report(Y_test, Y_pred)

# Print evaluation results
print(f"Model Accuracy: {accuracy:.2f}")
print("\nClassification Report:\n", report)

Model Accuracy: 0.48

Classification Report:
               precision    recall  f1-score   support

           0       0.50      0.33      0.40        27
           1       0.47      0.64      0.54        25

    accuracy                           0.48        52
   macro avg       0.49      0.49      0.47        52
weighted avg       0.49      0.48      0.47        52



In [1456]:
# saving the model
# import joblib

# joblib.dump(model, 'ml_wildfire_73.joblib')
# joblib.dump(scaler, 'scaler_73.joblib')