<a href="https://colab.research.google.com/github/GOLISHYAMP/Colab_Notebooks/blob/main/Pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [18]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [19]:
df = pd.read_csv('/content/drive/MyDrive/Datasets/Algerian_forest_fires_Cleaned_dataset.csv')

In [20]:
df.head()

Unnamed: 0,day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes,Region
0,1,6,2012,29,57,18,0.0,65.7,3.4,7.6,1.3,3.4,0.5,not fire,0
1,2,6,2012,29,61,13,1.3,64.4,4.1,7.6,1.0,3.9,0.4,not fire,0
2,3,6,2012,26,82,22,13.1,47.1,2.5,7.1,0.3,2.7,0.1,not fire,0
3,4,6,2012,25,89,13,2.5,28.6,1.3,6.9,0.0,1.7,0.0,not fire,0
4,5,6,2012,27,77,16,0.0,64.8,3.0,14.2,1.2,3.9,0.5,not fire,0


# Lets understand the data

In [21]:
df['month'].unique()

array([6, 7, 8, 9])

In [22]:
df[['Temperature',	'RH',	'Ws',	'Rain',	'FFMC',	'DMC',	'DC',	'ISI',	'BUI',	'FWI']].describe()

Unnamed: 0,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI
count,243.0,243.0,243.0,243.0,243.0,243.0,243.0,243.0,243.0,243.0
mean,32.152263,62.041152,15.493827,0.762963,77.842387,14.680658,49.430864,4.742387,16.690535,7.035391
std,3.628039,14.82816,2.811385,2.003207,14.349641,12.39304,47.665606,4.154234,14.228421,7.440568
min,22.0,21.0,6.0,0.0,28.6,0.7,6.9,0.0,1.1,0.0
25%,30.0,52.5,14.0,0.0,71.85,5.8,12.35,1.4,6.0,0.7
50%,32.0,63.0,15.0,0.0,83.3,11.3,33.1,3.5,12.4,4.2
75%,35.0,73.5,17.0,0.5,88.3,20.8,69.1,7.25,22.65,11.45
max,42.0,90.0,29.0,16.8,96.0,65.9,220.4,19.0,68.0,31.1


In [23]:
df['Classes'].unique()

array(['not fire', 'fire'], dtype=object)

In [24]:
df['Region'].unique()

array([0, 1])

# Lets drop the unwanted columns

In [25]:
df = df.drop(columns = ['day', 'year'], axis = 1)

In [26]:
df

Unnamed: 0,month,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes,Region
0,6,29,57,18,0.0,65.7,3.4,7.6,1.3,3.4,0.5,not fire,0
1,6,29,61,13,1.3,64.4,4.1,7.6,1.0,3.9,0.4,not fire,0
2,6,26,82,22,13.1,47.1,2.5,7.1,0.3,2.7,0.1,not fire,0
3,6,25,89,13,2.5,28.6,1.3,6.9,0.0,1.7,0.0,not fire,0
4,6,27,77,16,0.0,64.8,3.0,14.2,1.2,3.9,0.5,not fire,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
238,9,30,65,14,0.0,85.4,16.0,44.5,4.5,16.9,6.5,fire,1
239,9,28,87,15,4.4,41.1,6.5,8.0,0.1,6.2,0.0,not fire,1
240,9,27,87,29,0.5,45.9,3.5,7.9,0.4,3.4,0.2,not fire,1
241,9,24,54,18,0.1,79.7,4.3,15.2,1.7,5.1,0.7,not fire,1


# Transformation needed are
* OneHotencoding for month
* LabelEcoding for Classes
* Standard scale for rest

In [36]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.naive_bayes import BernoulliNB

class CustomModelPipeline:
    def __init__(self, OHE_features, SS_features, LE_features):
        self.OHE_features = OHE_features
        self.SS_features = SS_features
        self.LE_features = LE_features
        self.preprocessor = self._create_preprocessor()
        self.pipeline = self._create_pipeline()

    def _create_preprocessor(self):
        """Create a ColumnTransformer to handle different types of preprocessing"""
        preprocessor = ColumnTransformer([
            ('num', StandardScaler(), self.SS_features),
            ('le', LabelEncoder(), self.LE_features),
            ('cat', OneHotEncoder(drop = "first"), self.OHE_features)
        ])
        return preprocessor

    def _create_pipeline(self):
        """Create a Pipeline to combine the preprocessor and the model"""
        pipeline = Pipeline([
            ('preprocessor', self.preprocessor),
            ('classifier', BernoulliNB())  # You can change this to any other model
        ])
        return pipeline

    def fit(self, X, y):
        """Fit the pipeline to the data"""
        self.pipeline.fit(X, y)

    def predict(self, X):
        """Make predictions using the fitted pipeline"""
        return self.pipeline.predict(X)

    def score(self, X, y):
        """Evaluate the model performance"""
        return self.pipeline.score(X, y)

In [29]:
df.columns

Index(['month', 'Temperature', 'RH', 'Ws', 'Rain', 'FFMC', 'DMC', 'DC', 'ISI',
       'BUI', 'FWI', 'Classes', 'Region'],
      dtype='object')

In [37]:
# Defining numerical and categorical features
numerical_SS_features = ['Temperature', 'RH', 'Ws', 'Rain', 'FFMC', 'DMC', 'DC', 'ISI', 'BUI', 'FWI']
categorical_OHE_features = ['month']
categorical_LE_features = ['Classes', 'Region']

# Creating and using the custom model pipeline
model_pipeline = CustomModelPipeline(categorical_OHE_features, numerical_SS_features, categorical_LE_features)


In [None]:
# Splitting data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model_pipeline.fit(X_train, y_train)
print("Test accuracy:", model_pipeline.score(X_test, y_test))