In [1]:
import warnings
warnings.filterwarnings('ignore')

In [28]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score,precision_score,recall_score,confusion_matrix
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC

In [4]:
forest = pd.read_csv('forestfires.csv')
forest

Unnamed: 0,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,...,monthfeb,monthjan,monthjul,monthjun,monthmar,monthmay,monthnov,monthoct,monthsep,size_category
0,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,...,0,0,0,0,1,0,0,0,0,small
1,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,...,0,0,0,0,0,0,0,1,0,small
2,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,...,0,0,0,0,0,0,0,1,0,small
3,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,...,0,0,0,0,1,0,0,0,0,small
4,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,...,0,0,0,0,1,0,0,0,0,small
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
512,aug,sun,81.6,56.7,665.6,1.9,27.8,32,2.7,0.0,...,0,0,0,0,0,0,0,0,0,large
513,aug,sun,81.6,56.7,665.6,1.9,21.9,71,5.8,0.0,...,0,0,0,0,0,0,0,0,0,large
514,aug,sun,81.6,56.7,665.6,1.9,21.2,70,6.7,0.0,...,0,0,0,0,0,0,0,0,0,large
515,aug,sat,94.4,146.0,614.7,11.3,25.6,42,4.0,0.0,...,0,0,0,0,0,0,0,0,0,small


In [5]:
forest.shape

(517, 31)

In [6]:
forest.dtypes

month             object
day               object
FFMC             float64
DMC              float64
DC               float64
ISI              float64
temp             float64
RH                 int64
wind             float64
rain             float64
area             float64
dayfri             int64
daymon             int64
daysat             int64
daysun             int64
daythu             int64
daytue             int64
daywed             int64
monthapr           int64
monthaug           int64
monthdec           int64
monthfeb           int64
monthjan           int64
monthjul           int64
monthjun           int64
monthmar           int64
monthmay           int64
monthnov           int64
monthoct           int64
monthsep           int64
size_category     object
dtype: object

In [7]:
forest.isna().sum()

month            0
day              0
FFMC             0
DMC              0
DC               0
ISI              0
temp             0
RH               0
wind             0
rain             0
area             0
dayfri           0
daymon           0
daysat           0
daysun           0
daythu           0
daytue           0
daywed           0
monthapr         0
monthaug         0
monthdec         0
monthfeb         0
monthjan         0
monthjul         0
monthjun         0
monthmar         0
monthmay         0
monthnov         0
monthoct         0
monthsep         0
size_category    0
dtype: int64

### Data Preparation

In [8]:
forest2 = forest.copy()

In [10]:
le = LabelEncoder()
forest2['size_category'] = le.fit_transform(forest2['size_category'])
forest2['month'] = le.fit_transform(forest2['month'])
forest2['day'] = le.fit_transform(forest2['day'])
forest2.dtypes

month              int64
day                int64
FFMC             float64
DMC              float64
DC               float64
ISI              float64
temp             float64
RH                 int64
wind             float64
rain             float64
area             float64
dayfri             int64
daymon             int64
daysat             int64
daysun             int64
daythu             int64
daytue             int64
daywed             int64
monthapr           int64
monthaug           int64
monthdec           int64
monthfeb           int64
monthjan           int64
monthjul           int64
monthjun           int64
monthmar           int64
monthmay           int64
monthnov           int64
monthoct           int64
monthsep           int64
size_category      int64
dtype: object

### Model Building

In [12]:
x = forest2.drop(labels = 'size_category', axis = 1)
y = forest2[['size_category']]
x.shape, y.shape

((517, 30), (517, 1))

In [14]:
cols = x.columns
sc = StandardScaler()
x_scaled = sc.fit_transform(x)
x_scaled = pd.DataFrame(x_scaled, columns = cols)

In [15]:
x_train,x_test,y_train,y_test = train_test_split(x_scaled, y, test_size=0.2, stratify=y, random_state=0)

In [16]:
x_train.shape, y_train.shape

((413, 30), (413, 1))

In [18]:
x_test.shape, y_test.shape

((104, 30), (104, 1))

### Model Training | Testing | Evaluation

In [27]:
%%time
svm = SVC(kernel='linear')
svm.fit(x_train,y_train)
y_pred = svm.predict(x_test)

print('Accuracy           :', round(accuracy_score(y_test,y_pred),4))
print('Precision          :', round(precision_score(y_test,y_pred),4))
print('Recall             :', round(recall_score(y_test,y_pred),4))
print('Confusion matrix   :\n', confusion_matrix(y_test,y_pred))

Accuracy           : 0.9231
Precision          : 0.9048
Recall             : 1.0
Confusion matrix   :
 [[20  8]
 [ 0 76]]
Wall time: 16.8 ms


In [31]:
gridsearch = GridSearchCV(estimator  = svm,
                          param_grid = {'kernel' :['linear','rbf','poly'],
                                        'C'      :[0.1,0.5,1,2],
                                        'gamma'  :[0.1,0.01,0.5,1,2],
                                        'degree' :[1,3,5]},
                          cv         = 5)
gridsearch.fit(x_scaled,y)

GridSearchCV(cv=5, estimator=SVC(kernel='linear'),
             param_grid={'C': [0.1, 0.5, 1, 2], 'degree': [1, 3, 5],
                         'gamma': [0.1, 0.01, 0.5, 1, 2],
                         'kernel': ['linear', 'rbf', 'poly']})

In [32]:
gridsearch.best_params_

{'C': 2, 'degree': 1, 'gamma': 2, 'kernel': 'poly'}

#### Kernel : poly

In [33]:
%%time
svm = SVC(kernel='poly', C=2, degree=1, gamma=2)
svm.fit(x_train,y_train)
y_pred = svm.predict(x_test)

print('Accuracy           :', round(accuracy_score(y_test,y_pred),4))
print('Precision          :', round(precision_score(y_test,y_pred),4))
print('Recall             :', round(recall_score(y_test,y_pred),4))
print('Confusion matrix   :\n', confusion_matrix(y_test,y_pred))

Accuracy           : 0.9712
Precision          : 0.974
Recall             : 0.9868
Confusion matrix   :
 [[26  2]
 [ 1 75]]
Wall time: 23.2 ms


#### Kernel : rbf

In [34]:
%%time
svm = SVC(kernel='rbf', C=2, gamma=2)
svm.fit(x_train,y_train)
y_pred = svm.predict(x_test)

print('Accuracy           :', round(accuracy_score(y_test,y_pred),4))
print('Precision          :', round(precision_score(y_test,y_pred),4))
print('Recall             :', round(recall_score(y_test,y_pred),4))
print('Confusion matrix   :\n', confusion_matrix(y_test,y_pred))

Accuracy           : 0.7308
Precision          : 0.7553
Recall             : 0.9342
Confusion matrix   :
 [[ 5 23]
 [ 5 71]]
Wall time: 16.7 ms
