In [11]:
import pandas as pd
import numpy as np
from prophet import Prophet
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import pickle


Importing plotly failed. Interactive plots will not work.


In [12]:
df = pd.read_csv('Feature_Extraction-Dataset.csv')
df

Unnamed: 0,year_month,average_temperature_median,maximum_temperature_median,minimum_temperature_median,precipitation_lag_median,snow_depth_lag_median,wind_speed_lag_median,maximum_sustained_wind_speed_lag_median,wind_gust_lag_median,dew_point_lag_median,fog_lag_mean,thunder_lag_mean,lat_lag_median,lon_lag_median,is_fire
0,2013-01,55.614471,64.960743,44.885953,0.011162,999.900000,7.991097,13.838489,956.092650,41.105964,0,0,32.669667,-6.103159,1
1,2013-02,54.778911,66.249974,42.401767,1.201047,999.899998,7.141677,20.923666,920.017515,52.711859,0,0,32.641867,-6.160246,0
2,2013-03,59.054068,68.776137,48.846444,1.082615,999.900009,9.067693,16.035876,822.054271,46.756065,0,0,32.383879,-6.408526,0
3,2013-04,65.017165,75.533435,53.436463,0.502933,999.900011,7.435010,15.745283,892.987485,49.527429,1,0,32.524870,-6.464255,0
4,2013-05,65.352257,76.215955,53.523060,0.252309,999.900019,8.493570,17.927379,865.032257,59.992219,0,0,32.973982,-5.748396,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,2022-08,84.000238,96.288051,93.613272,0.035227,999.900009,7.882804,15.680300,864.674894,56.863913,0,0,31.967810,-6.822561,0
116,2022-09,78.637368,90.033046,67.355292,0.147708,999.900014,7.430511,24.306215,838.256282,56.468733,0,1,32.025639,-6.742323,0
117,2022-10,73.483634,84.170288,61.543757,0.260507,999.900005,6.773829,15.716851,704.311435,53.248464,0,0,32.302431,-6.871057,0
118,2022-11,69.442692,82.068617,81.818989,0.035346,999.900010,5.928829,31.482264,829.438659,42.927102,0,0,31.846114,-7.196780,0


## Intermediate Feature Prophet

In [15]:
df['year'] = pd.to_datetime(df['year_month']).dt.year
df['month'] = pd.to_datetime(df['year_month']).dt.month

In [16]:
df = df.drop(columns=['year_month'])

In [17]:
print(df.columns)

Index(['average_temperature_median', 'maximum_temperature_median',
       'minimum_temperature_median', 'precipitation_lag_median',
       'snow_depth_lag_median', 'wind_speed_lag_median',
       'maximum_sustained_wind_speed_lag_median', 'wind_gust_lag_median',
       'dew_point_lag_median', 'fog_lag_mean', 'thunder_lag_mean',
       'lat_lag_median', 'lon_lag_median', 'is_fire', 'year', 'month'],
      dtype='object')


In [18]:
feature_columns = ['year', 'month', 'average_temperature_median', 'maximum_temperature_median',
                   'minimum_temperature_median', 'precipitation_lag_median', 'snow_depth_lag_median',
                   'wind_speed_lag_median', 'maximum_sustained_wind_speed_lag_median', 'wind_gust_lag_median',
                   'dew_point_lag_median', 'fog_lag_mean', 'thunder_lag_mean', 'lat_lag_median', 'lon_lag_median']
intermediate_feature = 'average_temperature_median'  # Intermediate feature to be predicted by Prophet
target_column = 'is_fire'


In [19]:
df['date'] = pd.to_datetime(df['year'].astype(str) + '-' + df['month'].astype(str) + '-01')
df = df[['date'] + feature_columns + [target_column]]

In [20]:
prophet_df = df[['date', intermediate_feature]].rename(columns={'date': 'ds', intermediate_feature: 'y'})

## Train The Prophet Model

In [21]:
model_prophet = Prophet()
model_prophet.fit(prophet_df)


13:31:21 - cmdstanpy - INFO - Chain [1] start processing
13:31:21 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x117ecdf4d30>

In [22]:
future = model_prophet.make_future_dataframe(periods=0)  # Use existing dates only
forecast = model_prophet.predict(future)

In [23]:
df['predicted_intermediate_feature'] = forecast['yhat']

## Preparing data for Logistic Regression

In [24]:
X = df[feature_columns]
y = df[target_column]


In [30]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [31]:
model_logistic = LogisticRegression(random_state=42)
model_logistic.fit(X_train_scaled, y_train)

In [32]:
y_pred_logistic = model_logistic.predict(X_test_scaled)

In [34]:
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_logistic))
print("\nClassification Report:\n", classification_report(y_test, y_pred_logistic))
print("Accuracy:", accuracy_score(y_test, y_pred_logistic))
print("\nPredictions:")
print(y_pred_logistic)

Confusion Matrix:
 [[ 8  1]
 [ 2 13]]

Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.89      0.84         9
           1       0.93      0.87      0.90        15

    accuracy                           0.88        24
   macro avg       0.86      0.88      0.87        24
weighted avg       0.88      0.88      0.88        24

Accuracy: 0.875

Predictions:
[1 0 1 1 1 1 1 0 0 0 1 0 0 1 1 1 0 0 0 1 1 1 1 0]


## Exporting the model

In [35]:
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

with open('prophet_model.pkl', 'wb') as f:
    pickle.dump(model_prophet, f)

with open('logistic_model.pkl', 'wb') as f:
    pickle.dump(model_logistic, f)

In [36]:
df.head()

Unnamed: 0,date,year,month,average_temperature_median,maximum_temperature_median,minimum_temperature_median,precipitation_lag_median,snow_depth_lag_median,wind_speed_lag_median,maximum_sustained_wind_speed_lag_median,wind_gust_lag_median,dew_point_lag_median,fog_lag_mean,thunder_lag_mean,lat_lag_median,lon_lag_median,is_fire,predicted_intermediate_feature
0,2013-01-01,2013,1,55.614471,64.960743,44.885953,0.011162,999.9,7.991097,13.838489,956.09265,41.105964,0,0,32.669667,-6.103159,1,52.9404
1,2013-02-01,2013,2,54.778911,66.249974,42.401767,1.201047,999.899998,7.141677,20.923666,920.017515,52.711859,0,0,32.641867,-6.160246,0,54.751613
2,2013-03-01,2013,3,59.054068,68.776137,48.846444,1.082615,999.900009,9.067693,16.035876,822.054271,46.756065,0,0,32.383879,-6.408526,0,58.364758
3,2013-04-01,2013,4,65.017165,75.533435,53.436463,0.502933,999.900011,7.43501,15.745283,892.987485,49.527429,1,0,32.52487,-6.464255,0,63.144424
4,2013-05-01,2013,5,65.352257,76.215955,53.52306,0.252309,999.900019,8.49357,17.927379,865.032257,59.992219,0,0,32.973982,-5.748396,0,67.853842
