In [68]:
import pandas as pd
import numpy as np 
import seaborn as sns
%matplotlib inline

In [69]:
dataset= pd.read_csv("Algerian_forest_fires_dataset.csv") 

In [70]:
dataset.head()

Unnamed: 0,day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes
0,1,6,2012,29,57,18,0.0,65.7,3.4,7.6,1.3,3.4,0.5,not fire
1,2,6,2012,29,61,13,1.3,64.4,4.1,7.6,1.0,3.9,0.4,not fire
2,3,6,2012,26,82,22,13.1,47.1,2.5,7.1,0.3,2.7,0.1,not fire
3,4,6,2012,25,89,13,2.5,28.6,1.3,6.9,0.0,1.7,0.0,not fire
4,5,6,2012,27,77,16,0.0,64.8,3.0,14.2,1.2,3.9,0.5,not fire


In [71]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 247 entries, 0 to 246
Data columns (total 14 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   day          246 non-null    object
 1   month        245 non-null    object
 2   year         245 non-null    object
 3   Temperature  245 non-null    object
 4    RH          245 non-null    object
 5    Ws          245 non-null    object
 6   Rain         245 non-null    object
 7   FFMC         245 non-null    object
 8   DMC          245 non-null    object
 9   DC           245 non-null    object
 10  ISI          245 non-null    object
 11  BUI          245 non-null    object
 12  FWI          245 non-null    object
 13  Classes      244 non-null    object
dtypes: object(14)
memory usage: 27.1+ KB


## Data Cleaning

### Checking for null values

In [72]:
dataset.isnull().sum()

day            1
month          2
year           2
Temperature    2
 RH            2
 Ws            2
Rain           2
FFMC           2
DMC            2
DC             2
ISI            2
BUI            2
FWI            2
Classes        3
dtype: int64

In [73]:
dataset[dataset.isnull().any(axis=1)]

Unnamed: 0,day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes
122,,,,,,,,,,,,,,
123,Sidi-Bel Abbes Region Dataset,,,,,,,,,,,,,
168,14,7.0,2012.0,37.0,37.0,18.0,0.2,88.9,12.9,14.6 9,12.5,10.4,fire,


### Creating a new column on the basis of region

In [74]:
dataset["Region"]="Bejaia"

In [75]:
dataset["Region"]=dataset["Region"].where(dataset.index < 123, other = "Sidi-Bel Abbes")

In [76]:
dataset.head()

Unnamed: 0,day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes,Region
0,1,6,2012,29,57,18,0.0,65.7,3.4,7.6,1.3,3.4,0.5,not fire,Bejaia
1,2,6,2012,29,61,13,1.3,64.4,4.1,7.6,1.0,3.9,0.4,not fire,Bejaia
2,3,6,2012,26,82,22,13.1,47.1,2.5,7.1,0.3,2.7,0.1,not fire,Bejaia
3,4,6,2012,25,89,13,2.5,28.6,1.3,6.9,0.0,1.7,0.0,not fire,Bejaia
4,5,6,2012,27,77,16,0.0,64.8,3.0,14.2,1.2,3.9,0.5,not fire,Bejaia


In [77]:
dataset.iloc[123:,:]

Unnamed: 0,day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes,Region
123,Sidi-Bel Abbes Region Dataset,,,,,,,,,,,,,,Sidi-Bel Abbes
124,day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes,Sidi-Bel Abbes
125,1,6,2012,32,71,12,0.7,57.1,2.5,8.2,0.6,2.8,0.2,not fire,Sidi-Bel Abbes
126,2,6,2012,30,73,13,4,55.7,2.7,7.8,0.6,2.9,0.2,not fire,Sidi-Bel Abbes
127,3,6,2012,29,80,14,2,48.7,2.2,7.6,0.3,2.6,0.1,not fire,Sidi-Bel Abbes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
242,26,9,2012,30,65,14,0,85.4,16,44.5,4.5,16.9,6.5,fire,Sidi-Bel Abbes
243,27,9,2012,28,87,15,4.4,41.1,6.5,8,0.1,6.2,0,not fire,Sidi-Bel Abbes
244,28,9,2012,27,87,29,0.5,45.9,3.5,7.9,0.4,3.4,0.2,not fire,Sidi-Bel Abbes
245,29,9,2012,24,54,18,0.1,79.7,4.3,15.2,1.7,5.1,0.7,not fire,Sidi-Bel Abbes


In [78]:
dataset=dataset.drop(dataset[dataset.isnull().any(axis=1)].index)

In [79]:
dataset.isnull().sum()

day            0
month          0
year           0
Temperature    0
 RH            0
 Ws            0
Rain           0
FFMC           0
DMC            0
DC             0
ISI            0
BUI            0
FWI            0
Classes        0
Region         0
dtype: int64

## Converting columns into appropriate datatypes

In [80]:
dataset=dataset.reset_index()


In [81]:
dataset[dataset.index==122]

Unnamed: 0,index,day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes,Region
122,124,day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes,Sidi-Bel Abbes


In [122]:
dataset.columns

Index(['level_0', 'index', 'day', 'month', 'year', 'Temperature', 'RH', 'Ws',
       'Rain', 'FFMC', 'DMC', 'DC', 'ISI', 'BUI', 'FWI', 'Classes', 'Region'],
      dtype='object')

In [88]:
mapping={"Bejaia":0, "Sidi-Bel Abbes": 1}
dataset["Region"]=dataset["Region"].map(mapping)

In [133]:
dataset["Classes"]=dataset["Classes"].astype("category")
dataset["Classes"].cat.categories

Index(['fire', 'fire ', 'fire   ', 'not fire', 'not fire ', 'not fire   ',
       'not fire    ', 'not fire     '],
      dtype='object')

In [135]:
dataset["Classes"]=dataset["Classes"].str.strip()

In [139]:
dataset["Classes"]=dataset["Classes"].astype("category")
dataset["Classes"].cat.categories

Index(['fire', 'not fire'], dtype='object')

In [140]:
mapping={"not fire":0, "fire": 1}
dataset["Classes"]=dataset["Classes"].map(mapping).astype(int)

In [90]:
dataset["Region"]=dataset["Region"].astype(int)

In [102]:
dataset[dataset["Temperature"]=="Temperature"] 

Unnamed: 0,index,day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes,Region
122,124,day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes,1


In [103]:
## removing the unwanted 122 row which contains the header for the Sidi-Bell Abes Region
dataset=dataset.drop(122)

In [110]:
dataset=dataset.reset_index()

In [111]:
dataset.columns=dataset.columns.str.strip()

In [104]:
dataset["Temperature"]=dataset["Temperature"].astype(int)

In [105]:
dataset["RH"]=dataset["RH"].astype(int)

In [106]:
dataset["Ws"]=dataset["Ws"].astype(int)

In [123]:
dataset[["Rain",'FFMC', 'DMC', 'DC', 'ISI', 'BUI',"FWI"]]=dataset[["Rain",'FFMC', 'DMC', 'DC', 'ISI', 'BUI',"FWI"]].astype(float)

In [141]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 243 entries, 0 to 242
Data columns (total 17 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   level_0      243 non-null    int64  
 1   index        243 non-null    int64  
 2   day          243 non-null    object 
 3   month        243 non-null    object 
 4   year         243 non-null    object 
 5   Temperature  243 non-null    int32  
 6   RH           243 non-null    int32  
 7   Ws           243 non-null    int32  
 8   Rain         243 non-null    float64
 9   FFMC         243 non-null    float64
 10  DMC          243 non-null    float64
 11  DC           243 non-null    float64
 12  ISI          243 non-null    float64
 13  BUI          243 non-null    float64
 14  FWI          243 non-null    float64
 15  Classes      243 non-null    int32  
 16  Region       243 non-null    int32  
dtypes: float64(7), int32(5), int64(2), object(3)
memory usage: 27.7+ KB
