## Soft Margin Support Vector Classification (SVC) 를 이용한 분류

### 표준화 X

In [22]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report

# 유방암 데이터셋 로드
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 커널 종류 리스트
# rbf = (Radial Basis Function, 가우시안 커널)
kernel_list = ["linear", "poly", "rbf"]

for kernel in kernel_list:
    # SVM 모델 초기화
    svm = SVC(kernel=kernel)

    # 학습
    svm.fit(X_train, y_train)

    # 예측
    y_pred = svm.predict(X_test)

    # 분류 보고서 출력
    print(f"Kernel : {kernel}")
    print(f"{classification_report(y_test, y_pred)}")
    print("=" * 50)

Kernel : linear
              precision    recall  f1-score   support

           0       0.97      0.91      0.94        43
           1       0.95      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114

Kernel : poly
              precision    recall  f1-score   support

           0       1.00      0.86      0.93        43
           1       0.92      1.00      0.96        71

    accuracy                           0.95       114
   macro avg       0.96      0.93      0.94       114
weighted avg       0.95      0.95      0.95       114

Kernel : rbf
              precision    recall  f1-score   support

           0       1.00      0.86      0.93        43
           1       0.92      1.00      0.96        71

    accuracy                           0.95       114
   macro avg       0.96      0.93      0.94       114
weighted avg       0.95      0.

In [23]:
import pandas as pd

df = pd.DataFrame(X, columns=cancer.feature_names)

df["target"] = pd.Series(y)

# 데이터셋 정보 출력

df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,radius error,texture error,perimeter error,area error,smoothness error,compactness error,concavity error,concave points error,symmetry error,fractal dimension error,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,1.095,0.9053,8.589,153.4,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,0.5435,0.7339,3.398,74.08,0.005225,0.01308,0.0186,0.0134,0.01389,0.003532,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,0.7456,0.7869,4.585,94.03,0.00615,0.04006,0.03832,0.02058,0.0225,0.004571,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,0.4956,1.156,3.445,27.23,0.00911,0.07458,0.05661,0.01867,0.05963,0.009208,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,0.7572,0.7813,5.438,94.44,0.01149,0.02461,0.05688,0.01885,0.01756,0.005115,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


## 데이터 표준화 이후 작업

In [24]:
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 표준화
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# SVM 모델 초기화
# 커널 종류 리스트
# rbf = (Radial Basis Function, 가우시안 커널)
kernel_list = ["linear", "poly", "rbf"]

for kernel in kernel_list:
    # SVM 모델 초기화
    svm = SVC(kernel=kernel)

    # 학습
    svm.fit(X_train_scaled, y_train)

    # 예측
    y_pred = svm.predict(X_test_scaled)

    # 분류 보고서 출력
    print(f"Kernel : {kernel}")
    print(f"{classification_report(y_test, y_pred)}")
    print("=" * 50)

Kernel : linear
              precision    recall  f1-score   support

           0       1.00      0.95      0.98        43
           1       0.97      1.00      0.99        71

    accuracy                           0.98       114
   macro avg       0.99      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114

Kernel : poly
              precision    recall  f1-score   support

           0       1.00      0.95      0.98        43
           1       0.97      1.00      0.99        71

    accuracy                           0.98       114
   macro avg       0.99      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114

Kernel : rbf
              precision    recall  f1-score   support

           0       0.98      0.95      0.96        43
           1       0.97      0.99      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.

## SVC 의 하이퍼 파라미터 변경

In [25]:
from sklearn.model_selection import GridSearchCV

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 정규화
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 커널별 파라미터 그리드
param_grid_dict = {
    "linear": {"C": [0.1, 1, 10]},
    "poly": {
        "C": [0.1, 1, 10],
        "degree": [2, 3, 4],
        "gamma": ["scale", "auto"],
        "coef0": [0.0, 1.0],
    },
    "rbf": {"C": [0.1, 1, 10], "gamma": ["scale", "auto", 0.1, 1]},
}

# 커널별로 GridSearchCV 적용
for kernel in ["linear", "poly", "rbf"]:
    print(f"\n=== 커널: {kernel} ===")

    svc = SVC(kernel=kernel)
    param_grid = param_grid_dict[kernel]

    grid = GridSearchCV(svc, param_grid, cv=3, n_jobs=-1)
    grid.fit(X_train_scaled, y_train)

    best_model = grid.best_estimator_
    y_pred = best_model.predict(X_test_scaled)

    print(f"최적 파라미터: {grid.best_params_}")
    print(classification_report(y_test, y_pred))
    print("=" * 50)


=== 커널: linear ===
최적 파라미터: {'C': 1}
              precision    recall  f1-score   support

           0       1.00      0.95      0.98        43
           1       0.97      1.00      0.99        71

    accuracy                           0.98       114
   macro avg       0.99      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114


=== 커널: poly ===
최적 파라미터: {'C': 1, 'coef0': 0.0, 'degree': 2, 'gamma': 'scale'}
              precision    recall  f1-score   support

           0       1.00      0.95      0.98        43
           1       0.97      1.00      0.99        71

    accuracy                           0.98       114
   macro avg       0.99      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114


=== 커널: rbf ===
최적 파라미터: {'C': 1, 'gamma': 1}
              precision    recall  f1-score   support

           0       0.98      0.95      0.96        43
           1       0.97      0.99      0.98        71

    accuracy    

## SVR

In [26]:
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

# 캘리포니아 집값 데이터넷 로드
california_housing = fetch_california_housing()

# 특성 데이터와 타겟 데이터
X = california_housing.data
y = california_housing.target

# 샘플링
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1, random_state=42
)

# 표준화
scaler = StandardScaler()
# scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 커널 함수 리스트
kernel_list = ["linear", "poly", "rbf"]

# 커널 함수별로 학습, 예측, 평가 수행
for kernel in kernel_list:
    # SVM 모델 초기화
    svm = SVR(kernel=kernel)

    # 학습
    svm.fit(X_train_scaled, y_train)

    # 예측
    y_pred = svm.predict(X_test_scaled)
    mse = mean_squared_error(y_test, y_pred)

    # 분류 보고서 출력
    print(f"Kernel : {kernel}")
    print(f"MSE : {mse}")
    print("=" * 50)

Kernel : linear
MSE : 0.5724122911726106
Kernel : poly
MSE : 0.8680757594823234
Kernel : rbf
MSE : 0.36221601537209996


# SVR 의 하이퍼 파라미터 변경하여 적용

In [32]:
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error

# 데이터 불러오기
california = fetch_california_housing()
X, y = california.data, california.target

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1, random_state=42
)

# 표준화
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 커널별 파라미터 그리드 설정
param_grid_dict = {
    "linear": {"C": [0.1, 1, 10], "epsilon": [0.1, 0.2]},
    "rbf": {"C": [1, 10], "epsilon": [0.1, 0.2], "gamma": ["scale", "auto"]},
}

# 커널별 학습 및 평가
for kernel in ["linear", "rbf"]:
    print(f"\n=== 커널: {kernel} ===")

    svr = SVR(kernel=kernel)
    param_grid = param_grid_dict[kernel]

    grid = GridSearchCV(
        svr, param_grid, cv=3, scoring="neg_mean_squared_error", n_jobs=-1
    )
    grid.fit(X_train_scaled, y_train)

    best_model = grid.best_estimator_
    y_pred = best_model.predict(X_test_scaled)
    mse = mean_squared_error(y_test, y_pred)

    print(f"최적 파라미터: {grid.best_params_}")
    print(f"MSE: {mse:.4f}")
    print("=" * 50)


=== 커널: linear ===
최적 파라미터: {'C': 0.1, 'epsilon': 0.2}
MSE: 0.5697

=== 커널: rbf ===
최적 파라미터: {'C': 10, 'epsilon': 0.2, 'gamma': 'auto'}
MSE: 0.3250
