<a href="https://colab.research.google.com/github/LeeSeungYun1020/Machine_Learning/blob/main/colab/SVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SVM을 사용한 분류 (SVC)

In [32]:
from sklearn.datasets import load_breast_cancer, load_boston
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC, SVR

In [33]:
# 데이터 불러오기
x, y = load_breast_cancer(return_X_y=True)

In [34]:
print(x.shape, y.shape)

(569, 30) (569,)


In [35]:
# 데이터 분할
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=100)

In [36]:
# 모델 선택
model = SVC()

In [37]:
model.fit(x_train, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [38]:
# 학습 및 평가 데이터 점수
print(f"학습 데이터 점수: {model.score(x_train, y_train)}")
print(f"평가 데이터 점수: {model.score(x_test, y_test)}")

학습 데이터 점수: 0.8943661971830986
평가 데이터 점수: 0.9440559440559441


# SVM을 사용한 회귀 (SVR)

In [39]:
# 데이터 가져오기
x, y = load_boston(return_X_y=True)
print(x.shape, y.shape)

(506, 13) (506,)


In [40]:
# 데이터 분할
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=100)

In [41]:
# 모델 선택
model = SVR()

In [42]:
# 모델 학습
model.fit(x_train, y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [43]:
# 모델 평가
print(f"train score: {model.score(x_train, y_train)}")
print(f"test score: {model.score(x_test, y_test)}")

train score: 0.2149950338383958
test score: 0.2060097280934967


# 커널 기법을 사용한 분류 (SVC)

In [54]:
x, y = load_breast_cancer(True)
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=100)

In [57]:
model = SVC(kernel="linear")
model.fit(x_train, y_train)
print(f"kernel(linear) train score: {model.score(x_train, y_train)}")
print(f"kernel(linear) test score: {model.score(x_test, y_test)}")

kernel(linear) train score: 0.9624413145539906
kernel(linear) test score: 0.958041958041958


In [58]:
model = SVC(kernel="poly")
model.fit(x_train, y_train)
print(f"kernel(poly) train score: {model.score(x_train, y_train)}")
print(f"kernel(poly) test score: {model.score(x_test, y_test)}")

kernel(poly) train score: 0.8943661971830986
kernel(poly) test score: 0.9440559440559441


In [59]:
model = SVC(kernel="rbf") # default
model.fit(x_train, y_train)
print(f"kernel(rbf) train score: {model.score(x_train, y_train)}")
print(f"kernel(rbf) test score: {model.score(x_test, y_test)}")

kernel(rbf) train score: 0.8943661971830986
kernel(rbf) test score: 0.9440559440559441


# 커널 기법을 사용한 회귀 (SVR)

In [61]:
x, y = load_boston(True)
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=100)
for kernel in ["linear", "poly", "rbf"]:
  model = SVR(kernel=kernel)
  model.fit(x_train, y_train)
  print(f"kernel({kernel}) train score: {model.score(x_train, y_train)}")
  print(f"kernel({kernel}) test score: {model.score(x_test, y_test)}")

kernel(linear) train score: 0.7174340100454817
kernel(linear) test score: 0.6801104100555064
kernel(poly) train score: 0.1991283626139405
kernel(poly) test score: 0.20278278095499502
kernel(rbf) train score: 0.2149950338383958
kernel(rbf) test score: 0.2060097280934967


# SVM 매개변수
 - SVM 커널에 따라 다양한 매개변수 설정
 - 주로 사용하는 매개변수
  * kernel: 사용할 커널 기법
  * C: 하드 마진, 소프트 마진 결정
    - 데이터 샘플이 다른 클래스에 놓이는 것을 허용하는지를 결정
    - C 낮게 -> 이상치 허용 -> 과속적합
    - C 높게 -> 이상치 없음 -> 과대적합
  * gamma: 유사도 특성

In [62]:
x, y = load_breast_cancer(True)
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=100)
model = SVC(kernel="poly", degree=2, gamma="auto", C=0.1) # 다항식, 2차
model.fit(x_train, y_train)
print(f"kernel(poly) train score: {model.score(x_train, y_train)}")
print(f"kernel(poly) test score: {model.score(x_test, y_test)}")

kernel(poly) train score: 0.9741784037558685
kernel(poly) test score: 0.958041958041958


In [67]:
x, y = load_breast_cancer(True)
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=100)
model = SVC(kernel="rbf", gamma="auto", C=1.0) # 하드 마진
model.fit(x_train, y_train)
print(f"kernel(rbf) train score: {model.score(x_train, y_train)}")
print(f"kernel(rbf) test score: {model.score(x_test, y_test)}")

kernel(rbf) train score: 1.0
kernel(rbf) test score: 0.6083916083916084


# 매우 간단한 데이터 전처리

In [68]:
x, y = load_breast_cancer(True)
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=100)
model = SVC()
model.fit(x_train, y_train)
print(f"kernel(poly) train score: {model.score(x_train, y_train)}")
print(f"kernel(poly) test score: {model.score(x_test, y_test)}")

kernel(poly) train score: 0.8943661971830986
kernel(poly) test score: 0.9440559440559441


In [72]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)

model = SVC()
model.fit(x_train, y_train)
print(f"kernel(poly) train score: {model.score(x_train, y_train)}")
print(f"kernel(poly) test score: {model.score(x_test, y_test)}")

kernel(poly) train score: 0.9882629107981221
kernel(poly) test score: 0.951048951048951


In [73]:
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)

model = SVC()
model.fit(x_train, y_train)
print(f"kernel(poly) train score: {model.score(x_train, y_train)}")
print(f"kernel(poly) test score: {model.score(x_test, y_test)}")

kernel(poly) train score: 0.9859154929577465
kernel(poly) test score: 0.9370629370629371
