In [1]:
## imprting libraries 

import pandas as pd
import numpy as np 
import seaborn as sns 
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('cleaned_forest_fires.csv')

In [3]:
df.head()

Unnamed: 0,day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes,Region
0,1,6,2012,29,57,18,0.0,65.7,3.4,7.6,1.3,3.4,0.5,0,0
1,2,6,2012,29,61,13,1.3,64.4,4.1,7.6,1.0,3.9,0.4,0,0
2,3,6,2012,26,82,22,13.1,47.1,2.5,7.1,0.3,2.7,0.1,0,0
3,4,6,2012,25,89,13,2.5,28.6,1.3,6.9,0.0,1.7,0.0,0,0
4,5,6,2012,27,77,16,0.0,64.8,3.0,14.2,1.2,3.9,0.5,0,0


In [4]:
df.drop(['day','month','year'],axis=1, inplace=True)

In [5]:
df.head()

Unnamed: 0,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes,Region
0,29,57,18,0.0,65.7,3.4,7.6,1.3,3.4,0.5,0,0
1,29,61,13,1.3,64.4,4.1,7.6,1.0,3.9,0.4,0,0
2,26,82,22,13.1,47.1,2.5,7.1,0.3,2.7,0.1,0,0
3,25,89,13,2.5,28.6,1.3,6.9,0.0,1.7,0.0,0,0
4,27,77,16,0.0,64.8,3.0,14.2,1.2,3.9,0.5,0,0


In [6]:
x = df.drop('FWI',axis=1)
y = df['FWI']


In [7]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.35, random_state=42)

In [8]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

x_train_scaler = scaler.fit_transform(x_train)
x_test_scaler = scaler.transform(x_test)

### model training 

In [9]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()

lr.fit(x_train_scaler, y_train)
y_pred = lr.predict(x_test_scaler)
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Mean Squared Error:", mse)
print("R^2 Score:", r2) 


Mean Squared Error: 0.5462154677376376
R^2 Score: 0.9888025258717843


In [10]:
from sklearn.linear_model import Ridge
ridge = Ridge()

ridge.fit(x_train_scaler, y_train)
y_pred = ridge.predict(x_test_scaler)
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Mean Squared Error:", mse)
print("R^2 Score:", r2) 

Mean Squared Error: 0.604825394000634
R^2 Score: 0.9876010162629394


In [11]:
from sklearn.linear_model import Lasso
lasso = Lasso()

lasso.fit(x_train_scaler, y_train)
y_pred = lasso.predict(x_test_scaler)
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Mean Squared Error:", mse)
print("R^2 Score:", r2) 

Mean Squared Error: 2.2965165925308746
R^2 Score: 0.9529211700349831


## cross validation and hyperparameter tuning 

In [12]:
from sklearn.linear_model import LassoCV
lasso_cv = LassoCV(cv=5)
lasso_cv.fit(x_train_scaler, y_train)

In [13]:
# using cross validated lasso to predict on test data
y1_pred = lasso_cv.predict(x_test_scaler)
mse = mean_squared_error(y_test, y1_pred)
r2 = r2_score(y_test, y1_pred)
print("Mean Squared Error:", mse)
print("R^2 Score:", r2) 

Mean Squared Error: 0.5736832430295679
R^2 Score: 0.9882394336099248


In [14]:
final_alpha = lasso_cv.alpha_ # <- is the best alpha value

all_tired_alphas = lasso_cv.alphas_ # <- list of alphas that were tried

In [15]:
print("final_alpha", final_alpha)

final_alpha 0.03493541667440542


In [16]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
import joblib
import os

# use an existing fitted model (e.g., lasso_cv, ridge, or lr)
pipeline = Pipeline([
    ("scaler", scaler),     # the SAME scaler you fitted
    ("model", lasso_cv)     # use the fitted LassoCV (or replace with ridge / lr)
])

os.makedirs("./models", exist_ok=True)
joblib.dump(pipeline, "./models/model.pkl")

print("✅ Model pipeline saved")

✅ Model pipeline saved
