In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = sns.load_dataset('iris')
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [4]:
df.species.value_counts()

setosa        50
versicolor    50
virginica     50
Name: species, dtype: int64

In [5]:
y = df['species']
x = df.drop('species', axis=1)

In [6]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, \
                                                    random_state=111)

In [7]:
from sklearn.svm import SVC

In [8]:
svc = SVC()

In [9]:
svc.fit(x_train, y_train)

SVC()

In [10]:
predictions = svc.predict(x_test)

In [11]:
from sklearn.metrics import classification_report

In [12]:
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       0.71      0.71      0.71         7
   virginica       0.85      0.85      0.85        13

    accuracy                           0.87        30
   macro avg       0.85      0.85      0.85        30
weighted avg       0.87      0.87      0.87        30



In [None]:
# parameter optimization
# C : 오차허용범위
# gamma : 하나의 데이터의 영향 범위

In [20]:
from sklearn.metrics import accuracy_score

for c in (0.1, 1, 10, 100):
    for g in (1, 0.1, 0.01, 0.001):
        svc = SVC(C=c, gamma=g)
        svc.fit(x_train, y_train)
        preds = svc.predict(x_test)
        score = accuracy_score(y_test, preds)
        print('score :', np.round(score, 2), '  ', 'C :', c, 'gamma :', g)

score : 0.87    C : 0.1 gamma : 1
score : 0.83    C : 0.1 gamma : 0.1
score : 0.23    C : 0.1 gamma : 0.01
score : 0.23    C : 0.1 gamma : 0.001
score : 0.9    C : 1 gamma : 1
score : 0.93    C : 1 gamma : 0.1
score : 0.83    C : 1 gamma : 0.01
score : 0.33    C : 1 gamma : 0.001
score : 0.9    C : 10 gamma : 1
score : 0.97    C : 10 gamma : 0.1
score : 0.93    C : 10 gamma : 0.01
score : 0.83    C : 10 gamma : 0.001
score : 0.9    C : 100 gamma : 1
score : 0.9    C : 100 gamma : 0.1
score : 0.97    C : 100 gamma : 0.01
score : 0.93    C : 100 gamma : 0.001


In [21]:
svc = SVC(C=10, gamma=0.1)
svc.fit(x_train, y_train)
preds = svc.predict(x_test)
score = accuracy_score(y_test, preds)
score

0.9666666666666667

In [22]:
# GridSearchCV

In [None]:
svc = SVC()

In [23]:
from sklearn.model_selection import GridSearchCV

In [24]:
params = {'C':[0.1,1,10,100], 'gamma':[1,0.1,0.01,0.001]}

In [26]:
grid = GridSearchCV(svc, param_grid=params, refit=True, verbose=True)

In [28]:
grid.fit(x_train, y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  80 out of  80 | elapsed:    0.2s finished


GridSearchCV(estimator=SVC(C=10, gamma=0.1),
             param_grid={'C': [0.1, 1, 10, 100],
                         'gamma': [1, 0.1, 0.01, 0.001]},
             verbose=True)

In [29]:
grid.best_params_

{'C': 1, 'gamma': 1}

In [30]:
grid.best_score_

0.975

In [31]:
preds = grid.predict(x_test)

array(['setosa', 'setosa', 'virginica', 'virginica', 'virginica',
       'setosa', 'setosa', 'virginica', 'virginica', 'versicolor',
       'virginica', 'setosa', 'versicolor', 'virginica', 'virginica',
       'setosa', 'virginica', 'versicolor', 'setosa', 'virginica',
       'versicolor', 'virginica', 'versicolor', 'versicolor', 'virginica',
       'setosa', 'setosa', 'virginica', 'setosa', 'virginica'],
      dtype=object)

In [32]:
accuracy_score(y_test, preds)

0.9666666666666667