In [1]:
#Import libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['figure.figsize'] = (10,5)
plt.rcParams['figure.dpi'] = 200
import warnings
warnings.filterwarnings('ignore')
from sklearn.svm import SVC
sns.set_theme(style='darkgrid', palette='rainbow')
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
%matplotlib inline

In [4]:
#Load data
df = pd.read_csv('forestfires.csv')
df.drop(columns=['month','day'], inplace=True)
df

Unnamed: 0,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,dayfri,...,monthfeb,monthjan,monthjul,monthjun,monthmar,monthmay,monthnov,monthoct,monthsep,size_category
0,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.00,1,...,0,0,0,0,1,0,0,0,0,small
1,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.00,0,...,0,0,0,0,0,0,0,1,0,small
2,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.00,0,...,0,0,0,0,0,0,0,1,0,small
3,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.00,1,...,0,0,0,0,1,0,0,0,0,small
4,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.00,0,...,0,0,0,0,1,0,0,0,0,small
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
512,81.6,56.7,665.6,1.9,27.8,32,2.7,0.0,6.44,0,...,0,0,0,0,0,0,0,0,0,large
513,81.6,56.7,665.6,1.9,21.9,71,5.8,0.0,54.29,0,...,0,0,0,0,0,0,0,0,0,large
514,81.6,56.7,665.6,1.9,21.2,70,6.7,0.0,11.16,0,...,0,0,0,0,0,0,0,0,0,large
515,94.4,146.0,614.7,11.3,25.6,42,4.0,0.0,0.00,0,...,0,0,0,0,0,0,0,0,0,small


In [7]:
from sklearn.preprocessing import LabelEncoder

lb = LabelEncoder()
df['size_category'] = lb.fit_transform(df['size_category'])
df.head()


Unnamed: 0,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,dayfri,...,monthfeb,monthjan,monthjul,monthjun,monthmar,monthmay,monthnov,monthoct,monthsep,size_category
0,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0,1,...,0,0,0,0,1,0,0,0,0,1
1,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0,0,...,0,0,0,0,0,0,0,1,0,1
2,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0,0,...,0,0,0,0,0,0,0,1,0,1
3,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0,1,...,0,0,0,0,1,0,0,0,0,1
4,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0,0,...,0,0,0,0,1,0,0,0,0,1


In [11]:
x = df.iloc[:,:-1]
y = df['size_category']


In [12]:
sta_scaler = StandardScaler()
x = sta_scaler.fit_transform(x)
x

array([[-8.05959472e-01, -1.32332557e+00, -1.83047676e+00, ...,
        -4.40225453e-02, -1.72859706e-01, -7.06081245e-01],
       [-8.10203395e-03, -1.17954077e+00,  4.88890915e-01, ...,
        -4.40225453e-02,  5.78503817e+00, -7.06081245e-01],
       [-8.10203395e-03, -1.04982188e+00,  5.60715454e-01, ...,
        -4.40225453e-02,  5.78503817e+00, -7.06081245e-01],
       ...,
       [-1.64008316e+00, -8.46647711e-01,  4.74768113e-01, ...,
        -4.40225453e-02, -1.72859706e-01, -7.06081245e-01],
       [ 6.80956663e-01,  5.49002541e-01,  2.69382214e-01, ...,
        -4.40225453e-02, -1.72859706e-01, -7.06081245e-01],
       [-2.02087875e+00, -1.68591332e+00, -1.78044169e+00, ...,
         2.27156334e+01, -1.72859706e-01, -7.06081245e-01]])

In [13]:
#Splitting data into training and testing
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.2, random_state=42)

In [14]:
#Model building using rbf. we have 4 kernels: poly,linear,rbf,sigmoid

svc = SVC(kernel='rbf')
svc.fit(xtrain,ytrain)
ypred = svc.predict(xtest)

In [16]:
#Accuracy of after training and testing
svc.score(xtrain,ytrain), svc.score(xtest,ytest)

(0.7893462469733656, 0.7596153846153846)

The above accuracy scores shows that the model created by us is a generalized model. But the accuracy is less so we are moving into hyperparameter tuning to get correct paramater.

# Hyperparamater tuning

In [17]:
params = { 'kernel': ['poly','linear','rbf'],
           'C': [0.1,0.001,0.00011,1,10],
           'gamma' : [0.1,0.001,1,10]
         }

In [18]:
grid = GridSearchCV(SVC(), param_grid=params, verbose=1)

In [19]:
grid.fit(xtrain,ytrain)

Fitting 5 folds for each of 60 candidates, totalling 300 fits


GridSearchCV(estimator=SVC(),
             param_grid={'C': [0.1, 0.001, 0.00011, 1, 10],
                         'gamma': [0.1, 0.001, 1, 10],
                         'kernel': ['poly', 'linear', 'rbf']},
             verbose=1)

In [20]:
#Getting best parameters

grid.best_params_

{'C': 10, 'gamma': 0.1, 'kernel': 'linear'}

In [21]:
svm = SVC(kernel='linear',C = 10, gamma=0.1)

In [22]:
svm.fit(xtrain,ytrain)
ypred = svm.predict(xtest)

In [23]:
svm.score(xtrain,ytrain), svm.score(xtest,ytest)

(0.9830508474576272, 0.9615384615384616)