In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline


In [2]:
# Load dataset
df = pd.read_csv('Algerian_forest_fires_cleaned_dataset.csv')

In [21]:
df.head()

Unnamed: 0,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes,Region
0,29,57,18,0.0,65.7,3.4,7.6,1.3,3.4,0.5,0,0
1,29,61,13,1.3,64.4,4.1,7.6,1.0,3.9,0.4,0,0
2,26,82,22,13.1,47.1,2.5,7.1,0.3,2.7,0.1,0,0
3,25,89,13,2.5,28.6,1.3,6.9,0.0,1.7,0.0,0,0
4,27,77,16,0.0,64.8,3.0,14.2,1.2,3.9,0.5,0,0


In [3]:
# Drop unnecessary columns
df.drop(['day', 'month', 'year'], axis=1, inplace=True)

In [4]:
# Encode 'Classes' as 0 and 1
df['Classes'] = np.where(df['Classes'].str.contains("not fire"), 0, 1)

In [5]:
# Independent and dependent features
X = df.drop('FWI', axis=1)
y = df['FWI']

In [22]:
X

Unnamed: 0,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,Classes,Region
0,29,57,18,0.0,65.7,3.4,7.6,1.3,3.4,0,0
1,29,61,13,1.3,64.4,4.1,7.6,1.0,3.9,0,0
2,26,82,22,13.1,47.1,2.5,7.1,0.3,2.7,0,0
3,25,89,13,2.5,28.6,1.3,6.9,0.0,1.7,0,0
4,27,77,16,0.0,64.8,3.0,14.2,1.2,3.9,0,0
...,...,...,...,...,...,...,...,...,...,...,...
238,30,65,14,0.0,85.4,16.0,44.5,4.5,16.9,1,1
239,28,87,15,4.4,41.1,6.5,8.0,0.1,6.2,0,1
240,27,87,29,0.5,45.9,3.5,7.9,0.4,3.4,0,1
241,24,54,18,0.1,79.7,4.3,15.2,1.7,5.1,0,1


In [23]:
y

0      0.5
1      0.4
2      0.1
3      0.0
4      0.5
      ... 
238    6.5
239    0.0
240    0.2
241    0.7
242    0.5
Name: FWI, Length: 243, dtype: float64

In [6]:
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [7]:
# Custom Transformer for Correlation Selection (part of data transformation)
class CorrelationSelector(BaseEstimator, TransformerMixin):
    def __init__(self, threshold=0.85):
        self.threshold = threshold
        self.correlated_features = set()

In [12]:
# Custom Transformer for Correlation Selection (corrected)
class CorrelationSelector(BaseEstimator, TransformerMixin):
    def __init__(self, threshold=0.85):
        self.threshold = threshold
        self.correlated_features = set()
    
    def fit(self, X, y=None):
        corr_matrix = X.corr()
        for i in range(len(corr_matrix.columns)):
            for j in range(i):
                if abs(corr_matrix.iloc[i, j]) > self.threshold:
                    colname = corr_matrix.columns[i]
                    self.correlated_features.add(colname)
        return self
    
    def transform(self, X):
        return X.drop(self.correlated_features, axis=1)

In [13]:
# Data Transformation Pipeline
data_transform_pipeline = Pipeline(steps=[
    ('correlation_selector', CorrelationSelector(threshold=0.85)),  # Drop correlated features
    ('scaler', StandardScaler())                                    # Scale the features
])

In [14]:
# Apply the data transformation pipeline to the training and test data
X_train_transformed = data_transform_pipeline.fit_transform(X_train)
X_test_transformed = data_transform_pipeline.transform(X_test)


In [15]:
# Model Training Pipeline
model_pipeline = Pipeline(steps=[
    ('model', LinearRegression())   # Train Linear Regression
])

In [16]:
# Fit the model training pipeline on the transformed data
model_pipeline.fit(X_train_transformed, y_train)

In [18]:
# Make predictions on the transformed test data
y_pred = model_pipeline.predict(X_test_transformed)

In [19]:
# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [20]:
# Output results
print(f"Mean Absolute Error: {mae}")
print(f"R2 Score: {r2}")

Mean Absolute Error: 0.5468236465249978
R2 Score: 0.9847657384266951
