In [1]:
import pandas as pd 
import numpy as np 
import seaborn as sns
from sklearn.svm import SVC
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv("forestfires.csv")
data.head()

Unnamed: 0,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,...,monthfeb,monthjan,monthjul,monthjun,monthmar,monthmay,monthnov,monthoct,monthsep,size_category
0,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,...,0,0,0,0,1,0,0,0,0,small
1,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,...,0,0,0,0,0,0,0,1,0,small
2,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,...,0,0,0,0,0,0,0,1,0,small
3,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,...,0,0,0,0,1,0,0,0,0,small
4,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,...,0,0,0,0,1,0,0,0,0,small


In [3]:
#Converting categorical data into numerical data
string_columns=["month","day","size_category"]
le = preprocessing.LabelEncoder()
for i in string_columns:
    data[i] = le.fit_transform(data[i])

In [4]:
data.head()

Unnamed: 0,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,...,monthfeb,monthjan,monthjul,monthjun,monthmar,monthmay,monthnov,monthoct,monthsep,size_category
0,7,0,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,...,0,0,0,0,1,0,0,0,0,1
1,10,5,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,...,0,0,0,0,0,0,0,1,0,1
2,10,2,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,...,0,0,0,0,0,0,0,1,0,1
3,7,0,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,...,0,0,0,0,1,0,0,0,0,1
4,7,3,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,...,0,0,0,0,1,0,0,0,0,1


In [5]:
#Input variables
x=data.iloc[:,0:30]
x.head(2)

Unnamed: 0,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,...,monthdec,monthfeb,monthjan,monthjul,monthjun,monthmar,monthmay,monthnov,monthoct,monthsep
0,7,0,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,...,0,0,0,0,0,1,0,0,0,0
1,10,5,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,...,0,0,0,0,0,0,0,0,1,0


In [6]:
#Target variable
y=data.iloc[:,-1]
y.head()

0    1
1    1
2    1
3    1
4    1
Name: size_category, dtype: int32

In [7]:
y.unique() 

array([1, 0])

In [8]:
y.value_counts()

1    378
0    139
Name: size_category, dtype: int64

In [9]:
# Splitting data into training and testing data set
x_train, x_test,y_train,y_test = train_test_split(x,y, test_size=0.2,random_state=40) 

In [10]:
#Build model where kernel is linear
model_linear = SVC(kernel = "linear")
model_linear.fit(x_train,y_train)
pred_test_linear = model_linear.predict(x_test)

In [11]:
pd.crosstab(y_test,pred_test_linear)

col_0,0,1
size_category,Unnamed: 1_level_1,Unnamed: 2_level_1
0,25,0
1,2,77


In [12]:
# classification_report
from sklearn.metrics import classification_report
print(classification_report(y_test,pred_test_linear))

              precision    recall  f1-score   support

           0       0.93      1.00      0.96        25
           1       1.00      0.97      0.99        79

    accuracy                           0.98       104
   macro avg       0.96      0.99      0.97       104
weighted avg       0.98      0.98      0.98       104



In [13]:
#Build model where kernel is poly
model_poly = SVC(kernel = "poly")
model_poly.fit(x_train,y_train)
pred_test_poly = model_poly.predict(x_test)

In [14]:
pd.crosstab(y_test,pred_test_poly)

col_0,0,1
size_category,Unnamed: 1_level_1,Unnamed: 2_level_1
0,4,21
1,0,79


In [15]:
# classification_report
print(classification_report(y_test,pred_test_poly))

              precision    recall  f1-score   support

           0       1.00      0.16      0.28        25
           1       0.79      1.00      0.88        79

    accuracy                           0.80       104
   macro avg       0.90      0.58      0.58       104
weighted avg       0.84      0.80      0.74       104



In [16]:
#Build model where kernel is rbf
model_rbf = SVC(kernel = "rbf")
model_rbf.fit(x_train,y_train)
pred_test_rbf = model_rbf.predict(x_test)

In [17]:
pd.crosstab(y_test,pred_test_rbf)

col_0,0,1
size_category,Unnamed: 1_level_1,Unnamed: 2_level_1
0,3,22
1,0,79


In [18]:
# classification_report
print(classification_report(y_test,pred_test_rbf))

              precision    recall  f1-score   support

           0       1.00      0.12      0.21        25
           1       0.78      1.00      0.88        79

    accuracy                           0.79       104
   macro avg       0.89      0.56      0.55       104
weighted avg       0.83      0.79      0.72       104



In [19]:
#Build model where kernel is sigmoid
model_sigmoid = SVC(kernel = "sigmoid")
model_sigmoid.fit(x_train,y_train)
pred_test_sigmoid = model_sigmoid.predict(x_test)

In [20]:
pd.crosstab(y_test,pred_test_sigmoid)

col_0,0,1
size_category,Unnamed: 1_level_1,Unnamed: 2_level_1
0,3,22
1,9,70


In [21]:
# classification_report
print(classification_report(y_test,pred_test_sigmoid))

              precision    recall  f1-score   support

           0       0.25      0.12      0.16        25
           1       0.76      0.89      0.82        79

    accuracy                           0.70       104
   macro avg       0.51      0.50      0.49       104
weighted avg       0.64      0.70      0.66       104

