In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pickle

In [2]:
#Obtaining the dataset
data = pd.read_csv('C:/Users/harch/OneDrive/Desktop/Algerian_forest_fires_cleaned.csv')

In [8]:
#Handling outliers using IQR for the numeric columns
for column in ['Temperature', 'Rain', 'Ws', 'RH']:
    Q1 = data[column].quantile(0.25)
    Q3 = data[column].quantile(0.75)
    IQR = Q3 - Q1
    data = data[(data[column] >= (Q1 - 1.5 * IQR)) & (data[column] <= (Q3 + 1.5 * IQR))]

In [9]:
#Spliting the dataset
X = data[['Temperature', 'Rain', 'Ws']]
y = data['RH']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# 2. Model Training:

#Training the model
model = LinearRegression()
model.fit(X_train, y_train)

In [11]:
# 3. Model Evaluation:

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'MSE: {mse}, MAE: {mae}, R2: {r2}')

MSE: 150.30530704741477, MAE: 10.196041087456493, R2: 0.3860841161757601


In [14]:
# 4. Pickling the Model:

with open('linear_regression_model.pkl', 'wb') as file:
    pickle.dump(model, file)

# 5. Prediction on Unseen Data:
unseen_data = pd.read_csv('C:/Users/harch/OneDrive/Desktop/Algerian_forest_fires_cleaned.csv')

X_unseen = unseen_data[['Temperature', 'Rain', 'Ws']]

In [16]:
#Load the pickled model
with open('linear_regression_model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)

#Predict on unseen data
y_unseen_pred = loaded_model.predict(X_unseen)

In [18]:
#Evaluate the model's performance on unseen data
mse_unseen = mean_squared_error(unseen_data['RH'], y_unseen_pred)
mae_unseen = mean_absolute_error(unseen_data['RH'], y_unseen_pred)
r2_unseen = r2_score(unseen_data['RH'], y_unseen_pred)
print(f'MSE (Unseen): {mse_unseen}, MAE (Unseen): {mae_unseen}, R2 (Unseen): {r2_unseen}')

MSE (Unseen): 160.79511817723431, MAE (Unseen): 9.860877612856338, R2 (Unseen): 0.265673446550174
