<a href="https://colab.research.google.com/github/Newbim/HYAI/blob/main/HYAI3_SVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 선형적 관계를 가진 데이터 생성

In [None]:
import numpy as np

x = np.array([
    [-2, 2],
    [4, 1],
    [7, 6],
    [2, 4],
    [11, 2],

    [34, 4],
    [25, 10],
    [21, 10],
    [24, 4],
    [43, 2],
])
y = np.array([-1, -1, -1, -1, -1,     1, 1, 1, 1, 1])

In [None]:
import matplotlib.pyplot as plt

for val, inp in enumerate(x):
    if y[val] == -1:
        plt.scatter(inp[0], inp[1], s = 100, c = 'r')
    else:
        plt.scatter(inp[0], inp[1], s = 100, c = 'b')

# 선형 서포트 벡터 머신 구현

In [None]:
from sklearn.svm import SVC
import numpy as np

model = SVC(kernel = 'linear', C = 10) # 하이퍼파라미터 C, kernel = (ex. y=x^2)
model.fit(x, y) 

In [None]:
import matplotlib.pyplot as plt

w = model.coef_[0]
a = -w[0] / w[1]
xx = np.linspace(10, 20)
yy = a * xx - (model.intercept_[0]/w[1])


for val, inp in enumerate(x):
    if y[val] == -1:
        plt.scatter(inp[0], inp[1], s = 100, c = 'r')
    else:
        plt.scatter(inp[0], inp[1], s = 100, c = 'b')

ax = plt.gca()
plt.plot(xx, yy, C='g')

In [None]:
print(model.support_vectors_)

In [None]:
model.support_vectors_

In [None]:
from matplotlib.widgets import Circle
import matplotlib.pyplot as plt

w = model.coef_[0]
a = -w[0] / w[1]
xx = np.linspace(10, 20)
yy = a * xx - (model.intercept_[0]/w[1])


for val, inp in enumerate(x):
    if y[val] == -1:
        plt.scatter(inp[0], inp[1], s = 100, c = 'r')
    else:
        plt.scatter(inp[0], inp[1], s = 100, c = 'b')

ax = plt.gca()
plt.plot(xx, yy, C='g')

for i in range(len(model.support_vectors_)): #3개가 안뜨는 이유 길이를 잘못 잡음?
    circle = plt.Circle(model.support_vectors_[i], 1.5, color = 'black', fill = False)
    ax.add_patch(circle)

# 선형 서포트 벡터 머신 예측

In [None]:
model.predict([[-10, 2]])

# 비선형 관계를 가진 데이터 생성 

In [None]:
from sklearn.datasets import make_circles

x_nl, y_nl = make_circles(noise = 0.1, factor = 0.2, random_state = 1)
for val, inp in enumerate(x_nl):
    if y_nl[val] == 1:
        plt.scatter(inp[0], inp[1], s = 100, c = 'r')
    else:
        plt.scatter(inp[0], inp[1], s = 100, c = 'b')

# 커널 트릭 시각화

In [None]:
import math
fig = plt.figure(figsize = (15, 10))
ax = fig.add_subplot(111, projection = '3d')
for val, inp in enumerate(x_nl):
    if y_nl[val] == 1:
        x, y, z = [inp[0] * inp[0], math.sqrt(2) * inp[0] * inp[1], inp[1] * inp[1]]
        # x, y, z = [inp[0] * inp[0]], [math.sqrt(2) * inp[0] * inp[1]], [inp[1] * inp[1]]
        ax.scatter(x, y, z, color = 'r')
    else:
        x, y, z = [inp[0] * inp[0], math.sqrt(2) * inp[0] * inp[1], inp[1] * inp[1]]
        ax.scatter(x, y, z, color = 'b')

# 비선형 서포트 벡터 머신 구현

In [None]:
model = SVC(kernel = 'rbf', C = 10) # rbf -> rbf 커널
model.fit(x_nl, y_nl)

In [None]:
for val, inp in enumerate(x_nl):
    if y_nl[val] == 1:
        plt.scatter(inp[0], inp[1], s = 100, c = 'r')
    else:
        plt.scatter(inp[0], inp[1], s = 100, c = 'b')

In [None]:
model.predict([[0, 0]])

# Grid Search

In [None]:
import numpy as np

x = np.array([
    [-2, 2],
    [4, 1],
    [7, 6],
    [2, 4],
    [11, 2],

    [34, 4],
    [25, 10],
    [21, 10],
    [24, 4],
    [43, 2],
])
y = np.array([-1, -1, -1, -1, -1,     1, 1, 1, 1, 1])

In [None]:
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV

C_range = np.logspace(-2, 10, 13)

param_grid = dict(C=C_range)
cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42)
grid = GridSearchCV(SVC(), param_grid=param_grid, cv=cv)
grid.fit(x, y)

print(
    "The best parameters are %s with a score of %0.2f" 
    % (grid.best_params_, grid.best_score_)
)

In [None]:
C_2d_range = [0.000000000001, 1, 1000000000]
classifiers = []
for C in C_2d_range:
    
    clf = SVC(kernel = 'linear',C=C)
    clf.fit(x, y)
    classifiers.append((C, clf))

plt.figure(figsize=(25, 25))
xx, yy = np.meshgrid(np.linspace(-10, 45, 100), np.linspace(-10, 15, 200))

for k, (C, clf) in enumerate(classifiers):
    # evaluate decision function in a grid
    Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    # visualize decision function for these parameters
    plt.subplot(len(C_2d_range), len(C_2d_range), k + 1)
    plt.title("C=10^%d" % (np.log10(C)), size="medium")

    # visualize parameter's effect on decision function
    plt.pcolormesh(xx, yy, -Z, cmap=plt.cm.RdBu)
    plt.scatter(x[:, 0], x[:, 1], c=y, cmap=plt.cm.RdBu_r, edgecolors="k")
    plt.xticks(())
    plt.yticks(())
    plt.axis("tight")

scores = grid.cv_results_["mean_test_score"].reshape(len(C_range))

적절한 C 값 찾기. 왼쪾 사진의 경우 가장 가까운 두 점을 많이 영향 받음. 
C 를 키우면 soft margin vector machine이 됨.

In [None]:
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV

C_range = np.logspace(-2, 10, 13) # -2부터 10까지 딕셔너리? grid search 는 그리드를 만드는 것. 
gamma_range = np.logspace(-9, 3, 13)

param_grid = dict(gamma=gamma_range, C=C_range)
cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42)
grid = GridSearchCV(SVC(), param_grid=param_grid, cv=cv)
grid.fit(x_nl, y_nl)

print(
    "The best parameters are %s with a score of %0.2f" 
    % (grid.best_params_, grid.best_score_)
)

In [None]:
C_2d_range = [0.01, 1, 100]
gamma_2d_range = [0.01, 1, 100]
classifiers = []
for C in C_2d_range:
    for gamma in gamma_2d_range:
        clf = SVC(kernel = 'rbf',C=C, gamma=gamma)
        clf.fit(x_nl, y_nl)
        classifiers.append((C, gamma, clf))

plt.figure(figsize=(25, 25)) # 사진 크기. 
xx, yy = np.meshgrid(np.linspace(-2, 2, 200), np.linspace(-2, 2, 200))

for k, (C, gamma, clf) in enumerate(classifiers):
    # evaluate decision function in a grid
    Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    # visualize decision function for these parameters
    plt.subplot(len(C_2d_range), len(gamma_2d_range), k + 1)
    plt.title("gamma=10^%d, C=10^%d" % (np.log10(gamma), np.log10(C)), size="medium")

    # visualize parameter's effect on decision function
    plt.pcolormesh(xx, yy, -Z, cmap=plt.cm.RdBu)
    plt.scatter(x_nl[:, 0], x_nl[:, 1], c=y_nl, cmap=plt.cm.RdBu_r, edgecolors="k")
    plt.xticks(())
    plt.yticks(())
    plt.axis("tight")

scores = grid.cv_results_["mean_test_score"].reshape(len(C_range), len(gamma_range))

아래쪽으로 갈수록 C가 커짐. C -> 이상치에 대한 것. 

오른쪽으로 갈수록 gamma 의 값이 올라감. 오차(표준편차)가 적어짐. 

올라갈수록 분류가 세밀하게 잡혀짐. 

이 원형 그래프는 gamma에 영향을 많이 받음. 