# Multiclass SVM 구현

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

#IRIS 데이터 로드
iris =  sns.load_dataset('iris') 
X= iris.iloc[:,:4] #학습할데이터
y = iris.iloc[:,-1] #타겟
print(y)

0         setosa
1         setosa
2         setosa
3         setosa
4         setosa
         ...    
145    virginica
146    virginica
147    virginica
148    virginica
149    virginica
Name: species, Length: 150, dtype: object


In [7]:
print(X)

     sepal_length  sepal_width  petal_length  petal_width
0             5.1          3.5           1.4          0.2
1             4.9          3.0           1.4          0.2
2             4.7          3.2           1.3          0.2
3             4.6          3.1           1.5          0.2
4             5.0          3.6           1.4          0.2
..            ...          ...           ...          ...
145           6.7          3.0           5.2          2.3
146           6.3          2.5           5.0          1.9
147           6.5          3.0           5.2          2.0
148           6.2          3.4           5.4          2.3
149           5.9          3.0           5.1          1.8

[150 rows x 4 columns]


In [2]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=48)

In [3]:
def standardization(train, test):
    scaler = StandardScaler()
    train = scaler.fit_transform(train)
    test = scaler.transform(test)
    return train, test

X_train, X_test = standardization(X_train, X_test)

In [4]:
X_train

array([[ 0.78522493,  0.32015325,  0.77221097,  1.04726529],
       [-0.26563371, -1.29989934,  0.0982814 , -0.11996537],
       [ 0.43493872,  0.78302542,  0.94069336,  1.43634218],
       [-0.84944407,  0.78302542, -1.24957775, -1.28719604],
       [-0.38239578, -1.7627715 ,  0.15444219,  0.13941922],
       [ 0.55170079, -0.374155  ,  1.05301496,  0.7878807 ],
       [ 0.31817664, -0.14271892,  0.65988937,  0.7878807 ],
       [ 0.20141457, -0.374155  ,  0.43524618,  0.39880381],
       [-1.66677857, -0.14271892, -1.36189934, -1.28719604],
       [-0.14887164, -0.60559109,  0.21060299,  0.13941922],
       [-0.14887164, -1.06846325, -0.12636179, -0.24965767],
       [ 0.31817664, -0.60559109,  0.15444219,  0.13941922],
       [ 0.66846286, -0.83702717,  0.88453256,  0.91757299],
       [ 0.0846525 , -0.14271892,  0.77221097,  0.7878807 ],
       [-0.49915786, -0.14271892,  0.43524618,  0.39880381],
       [-0.26563371, -0.60559109,  0.65988937,  1.04726529],
       [ 2.18636979,  1.

In [5]:
X_test

array([[-0.14887164, -0.374155  ,  0.26676379,  0.13941922],
       [ 0.31817664, -0.60559109,  0.54756778,  0.00972692],
       [ 0.31817664, -1.06846325,  1.05301496,  0.26911151],
       [-1.5500165 , -1.7627715 , -1.36189934, -1.15750374],
       [ 0.0846525 ,  0.32015325,  0.60372857,  0.7878807 ],
       [ 0.78522493, -0.14271892,  0.99685416,  0.7878807 ],
       [-0.84944407,  1.70876975, -1.24957775, -1.15750374],
       [ 0.20141457, -0.14271892,  0.60372857,  0.7878807 ],
       [-0.38239578,  2.63451409, -1.30573855, -1.28719604],
       [-0.38239578, -1.29989934,  0.15444219,  0.13941922],
       [ 0.66846286,  0.08871717,  0.99685416,  0.7878807 ],
       [-0.38239578,  1.0144615 , -1.36189934, -1.28719604],
       [-0.49915786,  0.78302542, -1.13725615, -1.28719604],
       [ 0.43493872, -0.60559109,  0.60372857,  0.7878807 ],
       [ 0.55170079, -1.7627715 ,  0.37908538,  0.13941922],
       [ 0.55170079,  0.55158933,  0.54756778,  0.52849611],
       [-1.19973028,  0.

### One vs Rest SVM

클래스 수 확인

In [13]:
y.unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

one vs rest SVM을 하기 위해 클래스를 구현한다.

In [22]:
class OneVsRestSVM:
    def __init__(self, classes=3):
        self.classes = classes
        self.clfs = []
        self.pred_result = []
    
    # one-hot 인코딩을 진행
    def onehot_encoding(self, y_train):
        y_train = pd.get_dummies(y_train)
        return y_train
    
    # 인코딩된 y_train과 X_train 학습 데이터로 SVM 분류 모델 훈련을 진행
    def fit(self, X_train, y_train):
        y_encoded = self.onehot_encoding(y_train)

        for i in range(self.classes):
            clf_machine = SVC(kernel='rbf', C=1, gamma=5)
            clf_machine.fit(X_train, y_encoded.iloc[:,i])       # 인코딩된 y_train
            self.clfs.append(clf_machine)
    
    # 위 과정을 마치면 clfs에 분류된 결과가 저장된다. 각 clf_machine이 어떻게 분류했는지에 따라 
    # 투표를 진행한다. 
    # 각 분류기에서 나온 결과를 decision function으로 비교
    def predict(self, X_test):
        row = len(X_test)
        votes = np.zeros((row, self.classes), dtype=int)
        
        # OneVsOne과 다르게 OneVsRest는 일일이 따질 필요 없이 decision function만을 이용해 비교 가능하다.
        for i in range(row):
            self.pred_result.append(np.argmax([self.clfs[0].decision_function(X_test)[i], self.clfs[1].decision_function(X_test)[i], self.clfs[2].decision_function(X_test)[i]]))
        
        # onehot인코딩을 했던 데이터를 다시 문자 label로 변환
        self.pred_result = pd.DataFrame(self.pred_result).replace({0:'setosa', 1:'versicolor', 2:'virginica'})
        return self.pred_result
    
    # 정확도 확인
    def evaluate(self, y_test):
        print(f'Accuracy : {(accuracy_score(y_test, self.pred_result))}')


In [23]:
onerest = OneVsRestSVM()
onerest.fit(X_train, y_train)
pred_onerest = onerest.predict(X_test)
onerest.evaluate(y_test)

Accuracy : 0.8666666666666667
