In [1]:
#import libraries
import pandas as pd
import numpy as np

from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score, GridSearchCV, train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
#import dataset
forest_data = pd.read_csv("forestfires.csv")
forest_data.head()

Unnamed: 0,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,...,monthfeb,monthjan,monthjul,monthjun,monthmar,monthmay,monthnov,monthoct,monthsep,size_category
0,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,...,0,0,0,0,1,0,0,0,0,small
1,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,...,0,0,0,0,0,0,0,1,0,small
2,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,...,0,0,0,0,0,0,0,1,0,small
3,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,...,0,0,0,0,1,0,0,0,0,small
4,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,...,0,0,0,0,1,0,0,0,0,small


## EDA

In [3]:
#NA values
forest_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 517 entries, 0 to 516
Data columns (total 31 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   month          517 non-null    object 
 1   day            517 non-null    object 
 2   FFMC           517 non-null    float64
 3   DMC            517 non-null    float64
 4   DC             517 non-null    float64
 5   ISI            517 non-null    float64
 6   temp           517 non-null    float64
 7   RH             517 non-null    int64  
 8   wind           517 non-null    float64
 9   rain           517 non-null    float64
 10  area           517 non-null    float64
 11  dayfri         517 non-null    int64  
 12  daymon         517 non-null    int64  
 13  daysat         517 non-null    int64  
 14  daysun         517 non-null    int64  
 15  daythu         517 non-null    int64  
 16  daytue         517 non-null    int64  
 17  daywed         517 non-null    int64  
 18  monthapr  

In [4]:
#Duplicate values
forest_data[forest_data.duplicated()]

Unnamed: 0,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,...,monthfeb,monthjan,monthjul,monthjun,monthmar,monthmay,monthnov,monthoct,monthsep,size_category
53,aug,wed,92.1,111.2,654.1,9.6,20.4,42,4.9,0.0,...,0,0,0,0,0,0,0,0,0,small
100,aug,sun,91.4,142.4,601.4,10.6,19.8,39,5.4,0.0,...,0,0,0,0,0,0,0,0,0,small
215,mar,sat,91.7,35.8,80.8,7.8,17.0,27,4.9,0.0,...,0,0,0,0,1,0,0,0,0,large
303,jun,fri,91.1,94.1,232.1,7.1,19.2,38,4.5,0.0,...,0,0,0,1,0,0,0,0,0,small
426,aug,thu,91.6,248.4,753.8,6.3,20.4,56,2.2,0.0,...,0,0,0,0,0,0,0,0,0,small
461,aug,sat,93.7,231.1,715.1,8.4,18.9,64,4.9,0.0,...,0,0,0,0,0,0,0,0,0,small
501,aug,tue,96.1,181.1,671.2,14.3,21.6,65,4.9,0.8,...,0,0,0,0,0,0,0,0,0,small
508,aug,fri,91.0,166.9,752.6,7.1,25.9,41,3.6,0.0,...,0,0,0,0,0,0,0,0,0,small


In [5]:
#Remove duplicate rows
forest_data.drop_duplicates(inplace=True, ignore_index=True)

In [6]:
#Splitting of data
array = forest_data.values
x = array[:,2:-1]
y = LabelEncoder().fit_transform(array[:,-1])

In [7]:
#Splitting train and test data
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state=0,test_size=0.1)

## Model

In [8]:
#Find the optimum parameters using Gridsearch
clf = SVC()
para_grid = {"kernel":["rbf","poly"], "gamma":[10,1,0.5, 0.1], "C":[10,0.1,0.001],"degree":[2,3,4,5]}
gsv = GridSearchCV(estimator=clf,param_grid=para_grid, cv=5)
gsv.fit(x_train,y_train)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': [10, 0.1, 0.001], 'degree': [2, 3, 4, 5],
                         'gamma': [10, 1, 0.5, 0.1],
                         'kernel': ['rbf', 'poly']})

In [9]:
#best parameters
gsv.best_params_, gsv.best_score_

({'C': 10, 'degree': 2, 'gamma': 10, 'kernel': 'poly'}, 0.9759675107501196)

In [10]:
#model
clf_final = SVC(C=10, kernel="poly",degree=2, gamma=10)
clf_final.fit(x_train,y_train)

SVC(C=10, degree=2, gamma=10, kernel='poly')

In [11]:
#Score on the test data
clf_final.score(x_test,y_test)

1.0

In [12]:
#Confusion matrix for test data
from sklearn.metrics import confusion_matrix
matrix = confusion_matrix(y_test, clf_final.predict(x_test))
matrix

array([[12,  0],
       [ 0, 39]], dtype=int64)