In [149]:
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
import pandas as pd 

In [222]:
# Read Dataset 
dataset=pd.read_csv("Algerian_forest_fires_dataset.csv")
dataset.columns

Index(['day', 'month', 'year', 'Temperature', ' RH', ' Ws', 'Rain ', 'FFMC',
       'DMC', 'DC', 'ISI', 'BUI', 'FWI', 'Classes  '],
      dtype='object')

# Data Cleaning

In [224]:
# Check for Missing Values 
dataset[dataset.isnull().any(axis=(1))]

Unnamed: 0,day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes
122,,,,,,,,,,,,,,
123,Sidi-Bel Abbes Region Dataset,,,,,,,,,,,,,
168,14,7.0,2012.0,37.0,37.0,18.0,0.2,88.9,12.9,14.6 9,12.5,10.4,fire,


In [225]:
# remove the null values
dataset=dataset.dropna().reset_index(drop=True)
dataset.head()
dataset.isnull().sum()

day            0
month          0
year           0
Temperature    0
 RH            0
 Ws            0
Rain           0
FFMC           0
DMC            0
DC             0
ISI            0
BUI            0
FWI            0
Classes        0
dtype: int64

In [226]:
dataset.iloc[[122]]

Unnamed: 0,day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes
122,day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes


In [190]:
dataset.isnull().sum()

day            0
month          0
year           0
Temperature    0
 RH            0
 Ws            0
Rain           0
FFMC           0
DMC            0
DC             0
ISI            0
BUI            0
FWI            0
Classes        0
dtype: int64

In [227]:
dataset=dataset.drop(dataset.index[122]).reset_index(drop=True)


In [228]:
dataset.iloc[[122]]

Unnamed: 0,day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes
122,1,6,2012,32,71,12,0.7,57.1,2.5,8.2,0.6,2.8,0.2,not fire


In [229]:
# Dataset columns 
dataset.columns

Index(['day', 'month', 'year', 'Temperature', ' RH', ' Ws', 'Rain ', 'FFMC',
       'DMC', 'DC', 'ISI', 'BUI', 'FWI', 'Classes  '],
      dtype='object')

In [230]:
# lets fix the spaces in the columns 
dataset.columns=dataset.columns.str.strip()

In [231]:
dataset.columns


Index(['day', 'month', 'year', 'Temperature', 'RH', 'Ws', 'Rain', 'FFMC',
       'DMC', 'DC', 'ISI', 'BUI', 'FWI', 'Classes'],
      dtype='object')

In [232]:
dataset.columns

Index(['day', 'month', 'year', 'Temperature', 'RH', 'Ws', 'Rain', 'FFMC',
       'DMC', 'DC', 'ISI', 'BUI', 'FWI', 'Classes'],
      dtype='object')

# Change the Required Columns As the Integer

In [233]:
dataset[['month','day','year','Temperature','RH','Ws']].astype(int)

Unnamed: 0,month,day,year,Temperature,RH,Ws
0,6,1,2012,29,57,18
1,6,2,2012,29,61,13
2,6,3,2012,26,82,22
3,6,4,2012,25,89,13
4,6,5,2012,27,77,16
...,...,...,...,...,...,...
238,9,26,2012,30,65,14
239,9,27,2012,28,87,15
240,9,28,2012,27,87,29
241,9,29,2012,24,54,18


In [201]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 243 entries, 0 to 242
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   month        243 non-null    int64
 1   day          243 non-null    int64
 2   year         243 non-null    int64
 3   Temperature  243 non-null    int64
 4   RH           243 non-null    int64
 5   Ws           243 non-null    int64
dtypes: int64(6)
memory usage: 11.5 KB


# Changing The Other Columns to Floating

In [219]:
objects=[features for features in dataset.columns if dataset[features].dtype=='O']
dataset.columns

Index(['month', 'day', 'year', 'Temperature', 'RH', 'Ws'], dtype='object')

In [164]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 243 entries, 0 to 242
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   month        243 non-null    int64
 1   day          243 non-null    int64
 2   year         243 non-null    int64
 3   Temperature  243 non-null    int64
 4   RH           243 non-null    int64
 5   Ws           243 non-null    int64
dtypes: int64(6)
memory usage: 11.5 KB


In [165]:
for obj in objects:
    if obj!='Classes':
        dataset[obj]=dataset[obj].astype(float)

In [166]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 243 entries, 0 to 242
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   month        243 non-null    int64
 1   day          243 non-null    int64
 2   year         243 non-null    int64
 3   Temperature  243 non-null    int64
 4   RH           243 non-null    int64
 5   Ws           243 non-null    int64
dtypes: int64(6)
memory usage: 11.5 KB


# Advanced EDA

In [167]:
# lets save the cleaned dataset

dataset.to_csv("Algerian_Cleaned_Dataset.csv",index=False)

# Exploratory Data Analysis

In [168]:
dfcopy=dataset

In [169]:
dfcopy

Unnamed: 0,month,day,year,Temperature,RH,Ws
0,6,1,2012,29,57,18
1,6,2,2012,29,61,13
2,6,3,2012,26,82,22
3,6,4,2012,25,89,13
4,6,5,2012,27,77,16
...,...,...,...,...,...,...
238,9,26,2012,30,65,14
239,9,27,2012,28,87,15
240,9,28,2012,27,87,29
241,9,29,2012,24,54,18


In [170]:
dfcopy=dfcopy.drop(['day','month','year'],axis=1)

In [171]:
dfcopy.head()

Unnamed: 0,Temperature,RH,Ws
0,29,57,18
1,29,61,13
2,26,82,22
3,25,89,13
4,27,77,16
