# Multiclass SVM 구현

In [147]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

#IRIS 데이터 로드
iris =  sns.load_dataset('iris') 
X= iris.iloc[:,:4] #학습할데이터
y = iris.iloc[:,-1] #타겟
print(y)

0         setosa
1         setosa
2         setosa
3         setosa
4         setosa
         ...    
145    virginica
146    virginica
147    virginica
148    virginica
149    virginica
Name: species, Length: 150, dtype: object


multiclass SVM을 one vs one으로 구현해보자.

In [148]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=48)

# dataset 생성

In [149]:
def standardization(train, test):
    scaler = StandardScaler()
    train = scaler.fit_transform(train)
    test = scaler.transform(test)
    return train, test

X_train, X_test = standardization(X_train, X_test)

iris 데이터의 class는 총 3개이므로 pd.get_dummies를 통해 각 class에 해당하는 데이터셋 3개를 만들자.

In [150]:
y_train

110     virginica
69     versicolor
148     virginica
39         setosa
53     versicolor
          ...    
64     versicolor
91     versicolor
81     versicolor
51     versicolor
0          setosa
Name: species, Length: 120, dtype: object

In [151]:
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

Y_train = pd.get_dummies(y_train)
Y_test = pd.get_dummies(y_test)

(120, 4) (120,) (30, 4) (30,)


# n_class개의 분류기 생성

In [166]:
name = {0:'setosa', 1:'versicolor', 2:'virginica'}
n_class = 3
vote = []

for i in range(n_class):
    print(f"class : {name[i]}")
    # model fitting
    model = SVC(kernel='rbf', C=5, gamma=5, probability = True)
    model.fit(X_train, Y_train.iloc[:,i])
    
    # predict
    prob = model.predict_proba(X_test)[:,1]
    predict = model.predict(X_test)
    vote.append(prob)


class : setosa
class : versicolor
class : virginica


In [167]:
vote

[array([0.00621536, 0.02324869, 0.06510919, 0.13373026, 0.03928502,
        0.0216232 , 0.9999897 , 0.01228896, 0.96510752, 0.01186135,
        0.02609762, 0.83305952, 0.92629475, 0.01295558, 0.04755133,
        0.01661003, 0.99999875, 0.09875486, 0.04681178, 0.01196778,
        0.95151677, 0.01375807, 0.02690462, 0.00992023, 0.04128403,
        0.99999455, 0.95009692, 0.00943923, 0.01636012, 0.00859153]),
 array([0.99604449, 0.93443283, 0.61982344, 0.21619549, 0.3075326 ,
        0.04848952, 0.02095464, 0.03989565, 0.05297601, 0.98197486,
        0.06069198, 0.08432556, 0.06439   , 0.04726504, 0.88896472,
        0.94982691, 0.01503908, 0.38141828, 0.32910961, 0.02419364,
        0.05886186, 0.02553611, 0.6408428 , 0.98713962, 0.11518269,
        0.01894581, 0.05939306, 0.01919834, 0.03593654, 0.99117652]),
 array([0.00831668, 0.05904351, 0.2021984 , 0.45788807, 0.60981837,
        0.94306038, 0.02101353, 0.97986669, 0.07466733, 0.02679696,
        0.92764829, 0.14920347, 0.10352309, 

# voting

In [169]:
answer = []

for i_1, i_2, i_3 in zip(vote[0], vote[1], vote[2]):
    ind_li = [i_1, i_2, i_3]
    target = ind_li.index(max(ind_li))
    
    answer.append(target)
    
print(answer)


[1, 1, 1, 2, 2, 2, 0, 2, 0, 1, 2, 0, 0, 2, 1, 1, 0, 1, 2, 2, 0, 2, 1, 1, 2, 0, 0, 2, 2, 1]


# Accuracy

In [170]:
score = 0
for i in range(len(answer)):
    if name[answer[i]] == y_test.iloc[i]:
        score += 1
        
print(f"Accuracy : {np.round(score / len(y_test), 2)}")

Accuracy : 0.87


# Class 생성

In [171]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

#IRIS 데이터 로드
iris =  sns.load_dataset('iris') 
X= iris.iloc[:,:4] #학습할데이터
y = iris.iloc[:,-1] #타겟
print(y)

0         setosa
1         setosa
2         setosa
3         setosa
4         setosa
         ...    
145    virginica
146    virginica
147    virginica
148    virginica
149    virginica
Name: species, Length: 150, dtype: object


In [172]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=48)

In [173]:
def standardization(train, test):
    scaler = StandardScaler()
    train = scaler.fit_transform(train)
    test = scaler.transform(test)
    return train, test

X_train, X_test = standardization(X_train, X_test)

In [174]:
class multi_SVM:
    def __init__(self, n_class = 3):
        self.n_class = n_class
        self.vote = []
        self.answer = []
         
        
    def fit(self, X_train, y_train, X_test, y_test):
        # encoding
        self.y_test = y_test
        self.Y_train = pd.get_dummies(y_train)
        self.Y_test = pd.get_dummies(y_test)
        
        
        # 이름 dict 생성
        self.name = {}
        for i in range(len(Y_train.columns)):
            self.name[i] = Y_train.columns[i]
         
        for i in range(self.n_class):
            model = SVC(kernel='rbf', C=5, gamma=5, probability = True)
            model.fit(X_train, Y_train.iloc[:,i])
            
            # predict
            prob = model.predict_proba(X_test)[:,1]
            predict = model.predict(X_test)
            self.vote.append(prob)
            
    def predict(self):
        for i_1, i_2, i_3 in zip(self.vote[0], self.vote[1], self.vote[2]):
            ind_li = [i_1, i_2, i_3]
            target = ind_li.index(max(ind_li))

            self.answer.append(target)
            
    def accuracy(self):
        score = 0
        for i in range(len(self.answer)):
            if self.name[self.answer[i]] == self.y_test.iloc[i]:
                score += 1

        print(f"Accuracy : {np.round(score / len(self.y_test), 2)}")

In [175]:
sm = multi_SVM()
sm.fit(X_train, y_train, X_test, y_test)
sm.predict()
sm.accuracy()

Accuracy : 0.87
