In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
df = pd.read_csv('Algerian_forest_fires_dataset.csv')
df.head()

In [None]:
df.info()

In [None]:
## Data Cleaning
df.isnull().sum()

In [None]:
df[df.isnull().any(axis=1)]

In [None]:
### The data set is converted into two sets based on the Region from 122th index , we can make a new column based on Region:
### 1. Bejaia Region Dataset
### 2. Sidi-Bel Abbes Region Dataset

## Add new column with region

In [None]:
df.loc[:122,"Region"]=0
df.loc[122:,"Region"]=1

In [None]:
df.head()


In [None]:
df.tail()

In [None]:
df[['Region']] = df[['Region']].astype(int)

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
## Removing the null values 
df=df.dropna().reset_index(drop=True)

In [None]:
df.head()

In [None]:
df.isnull().sum()

In [None]:
df.iloc[[122]]

In [None]:
df = df.drop(122).reset_index(drop=True)

In [None]:
df.iloc[[122]]

In [None]:
df.columns

In [None]:
## Fixing the spaces in the column names 

df.columns = df.columns.str.strip()

In [None]:
df.columns

In [None]:
## Changing the required columns to integer datatype
df[['day', 'month', 'year', 'Temperature', 'RH', 'Ws']] = df[['day', 'month', 'year', 'Temperature', 'RH', 'Ws']].astype(int)

In [None]:
df.info()

In [None]:
df.head()


In [None]:
## Changing the other columns to float datatype 
objects = [features for features in df.columns if df[features].dtypes=='O']

In [None]:
for i in objects:
    if i!='Classes':
        df[i] = df[i].astype(float)

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
## Let's saved the cleaned dataset
df.to_csv('Algerian_Forest_Fires_Cleaned_Dataset',index=False)

In [None]:
## exploratory data analysis
dfcopy = df

In [None]:
dfcopy.head()

In [None]:
dfcopy=dfcopy.drop(['day','month','year'],axis=1)

In [None]:
dfcopy.head()

In [None]:
## Encoding of categorical classes
dfcopy['Classes'] = np.where(dfcopy['Classes'].str.contains('not fire'),0,1)

In [None]:
dfcopy.head()


In [None]:
dfcopy.tail()

In [None]:
dfcopy['Classes'].value_counts()

In [None]:
import matplotlib.pyplot as plt

# Use updated style
plt.style.use('seaborn-v0_8')

dfcopy.hist(bins=50, figsize=(20,15))
plt.show()


In [None]:
## Percentage for pie chart
percentage = dfcopy['Classes'].value_counts(normalize=True)*100

In [None]:
## Plotting pie chart 
classlabels = ['Fire','Not Fire']
plt.figure(figsize=(12,7))
plt.pie(percentage,labels=classlabels,autopct='%1.1f%%')
plt.title('Pie Chart of Classes')
plt.show()

In [None]:
dfcopy.corr()

In [None]:
sns.heatmap(dfcopy.corr())

In [None]:
## Box plot to check for outliers 
sns.boxplot(dfcopy['FWI'])

In [None]:
## Monthly Fire Analysis
## Encoding of categorical classes

dftemp = df.loc[df['Region']==1]
plt.subplots(figsize=(10,10))
sns.set_style('whitegrid')
sns.countplot(x='month',hue='Classes',data=df)