In [1]:
import numpy as np 
import pandas as pd

In [2]:
df = pd.read_csv("Algerian_forest_fires_dataset_UPDATE.csv")

In [3]:
df.shape

(246, 14)

In [4]:
df.head()

Unnamed: 0,day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes
0,1,6,2012,29,57,18,0.0,65.7,3.4,7.6,1.3,3.4,0.5,not fire
1,2,6,2012,29,61,13,1.3,64.4,4.1,7.6,1.0,3.9,0.4,not fire
2,3,6,2012,26,82,22,13.1,47.1,2.5,7.1,0.3,2.7,0.1,not fire
3,4,6,2012,25,89,13,2.5,28.6,1.3,6.9,0.0,1.7,0.0,not fire
4,5,6,2012,27,77,16,0.0,64.8,3.0,14.2,1.2,3.9,0.5,not fire


In [5]:
df.drop(['day','month','year'], axis=1,inplace = True)

In [6]:
df.drop([123,124,125], axis=0,inplace = True)

In [7]:
df.isnull().sum()

Temperature    1
 RH            1
 Ws            1
Rain           1
FFMC           1
DMC            1
DC             1
ISI            1
BUI            1
FWI            1
Classes        2
dtype: int64

In [8]:
df.dropna(inplace = True)

In [9]:
df.isnull().sum()

Temperature    0
 RH            0
 Ws            0
Rain           0
FFMC           0
DMC            0
DC             0
ISI            0
BUI            0
FWI            0
Classes        0
dtype: int64

In [10]:
df.columns

Index(['Temperature', ' RH', ' Ws', 'Rain ', 'FFMC', 'DMC', 'DC', 'ISI', 'BUI',
       'FWI', 'Classes  '],
      dtype='object')

In [11]:
df['Classes  '].unique()

array(['not fire   ', 'fire   ', 'fire', 'fire ', 'not fire', 'not fire ',
       'not fire     ', 'not fire    '], dtype=object)

In [12]:
df['Classes  '] = df['Classes  '].apply(lambda x: x.rstrip())
df['Classes  '].unique()

array(['not fire', 'fire'], dtype=object)

In [13]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
df['Classes  '] = label_encoder.fit_transform(df['Classes  '])
  
df['Classes  '].unique()

array([1, 0])

In [14]:
df.head()

Unnamed: 0,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes
0,29,57,18,0.0,65.7,3.4,7.6,1.3,3.4,0.5,1
1,29,61,13,1.3,64.4,4.1,7.6,1.0,3.9,0.4,1
2,26,82,22,13.1,47.1,2.5,7.1,0.3,2.7,0.1,1
3,25,89,13,2.5,28.6,1.3,6.9,0.0,1.7,0.0,1
4,27,77,16,0.0,64.8,3.0,14.2,1.2,3.9,0.5,1


In [15]:
X = df.iloc[:,:-1]
Y = df.iloc[:,-1]

In [16]:
from sklearn.preprocessing import StandardScaler
x_scaled = StandardScaler().fit_transform(X)

In [17]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

In [18]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

def to_print(y_test, y_pred):
    cm = confusion_matrix(y_test, y_pred)
    print("Confusion Matrix: \n", cm)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    print ("Accuracy:", accuracy)
    print('Precision:', precision)
    print('Recall:', recall)

In [19]:
from sklearn.ensemble import AdaBoostClassifier

clf = AdaBoostClassifier()
clf.fit(X_train, y_train)
y_pred=clf.predict(X_test)
to_print(y_test,y_pred)

Confusion Matrix: 
 [[29  1]
 [ 0 19]]
Accuracy: 0.9795918367346939
Precision: 0.95
Recall: 1.0


In [20]:
n_estimator = [50, 100, 150]

for ne in n_estimator:
    print("---------------", "For n_estimator =", ne, "---------------")
    clf = AdaBoostClassifier(n_estimators = ne)
    clf.fit(X_train, y_train)
    y_pred=clf.predict(X_test)
    to_print(y_test,y_pred)

--------------- For n_estimator = 50 ---------------
Confusion Matrix: 
 [[29  1]
 [ 0 19]]
Accuracy: 0.9795918367346939
Precision: 0.95
Recall: 1.0
--------------- For n_estimator = 100 ---------------
Confusion Matrix: 
 [[30  0]
 [ 0 19]]
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
--------------- For n_estimator = 150 ---------------
Confusion Matrix: 
 [[29  1]
 [ 0 19]]
Accuracy: 0.9795918367346939
Precision: 0.95
Recall: 1.0


In [21]:
learning_rate = [1.0, 0.01, 0.001] 

for lr in learning_rate:
    print("---------------", "For learning rate =", lr, "---------------")
    clf = AdaBoostClassifier(learning_rate = lr)
    clf.fit(X_train, y_train)
    y_pred=clf.predict(X_test)
    to_print(y_test,y_pred)

--------------- For learning rate = 1.0 ---------------
Confusion Matrix: 
 [[29  1]
 [ 0 19]]
Accuracy: 0.9795918367346939
Precision: 0.95
Recall: 1.0
--------------- For learning rate = 0.01 ---------------
Confusion Matrix: 
 [[30  0]
 [ 0 19]]
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
--------------- For learning rate = 0.001 ---------------
Confusion Matrix: 
 [[30  0]
 [ 0 19]]
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
