In [4]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

# 데이터 불러오기
housing = fetch_california_housing()
x, y = housing.data, housing.target

# 5,000개만 샘플링(test셋은 안할거임)
x_train, _, y_train, _ = train_test_split(x, y, train_size=5000, random_state=42)

# 3. 파이프라인 (데이터 표준화 + SVR로 시행)
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("svr", SVR())
])

# 4. 하이퍼파라미터 탐색 설정
param_grid = [
    # A. kernel = linear, 다양한 C 값
    {"svr__kernel": ["linear"],
     "svr__C": [0.1, 1, 10, 100]},

    # B. kernel = rbf, 다양한 C, gamma 조합
    {"svr__kernel": ["rbf"],
     "svr__C": [0.1, 1, 10],
     "svr__gamma": ["scale", 0.01, 0.1, 1]}
]

# 3-겹 교차검증
grid_search = GridSearchCV(pipe, param_grid, cv=3, scoring="neg_mean_squared_error", n_jobs=-1)

# 6. 학습
grid_search.fit(x_train, y_train)

# 7. 결과 확인
print("최적 하이퍼파라미터:", grid_search.best_params_)
print("최적 성능 (MSE):", -grid_search.best_score_)

최적 하이퍼파라미터: {'svr__C': 10, 'svr__gamma': 'scale', 'svr__kernel': 'rbf'}
최적 성능 (MSE): 0.3132591325724254


In [5]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from scipy.stats import uniform

# 1. 데이터셋 불러오기
housing = fetch_california_housing()
X, y = housing.data, housing.target

# 2. 훈련셋 5,000개만 샘플링
X_train, _, y_train, _ = train_test_split(X, y, train_size=5000, random_state=42)

# 3. 파이프라인 (표준화 + SVR)
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("svr", SVR())
])

# 4. 하이퍼파라미터 탐색 공간
param_distributions = {
    # kernel 종류를 두 가지로 제한
    "svr__kernel": ["linear", "rbf"],
    # C 값: 0.1 ~ 100 사이 연속 분포에서 샘플링
    "svr__C": uniform(0.1, 100),
    # gamma 값: rbf에서만 의미 있음 (0.001 ~ 1 사이)
    "svr__gamma": uniform(0.001, 1)
}

# 5. RandomizedSearchCV (3-겹 교차검증, 20회 탐색)
# 그리드는 전수조사고 랜덤은 지정한 분포에서 무작위 일부 조합만 시)
random_search = RandomizedSearchCV(
    pipe,
    param_distributions,
    n_iter=20,  # 탐색 횟수
    cv=3,
    scoring="neg_mean_squared_error",
    n_jobs=-1,
    random_state=42
)

# 6. 학습
random_search.fit(X_train, y_train)

# 7. 결과 확인
print("최적 하이퍼파라미터:", random_search.best_params_)
print("최적 성능 (MSE):", -random_search.best_score_)


최적 하이퍼파라미터: {'svr__C': np.float64(23.189382562214902), 'svr__gamma': np.float64(0.24202546602601172), 'svr__kernel': 'rbf'}
최적 성능 (MSE): 0.3456692017905895


In [6]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.svm import SVR, LinearSVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectFromModel
from scipy.stats import uniform

# 1. 데이터셋 불러오기
housing = fetch_california_housing()
X, y = housing.data, housing.target

# 2. 훈련셋 5,000개만 샘플링
X_train, _, y_train, _ = train_test_split(X, y, train_size=5000, random_state=42)

# 3. 파이프라인 구성
pipe = Pipeline([
    ("scaler", StandardScaler()),  # 표준화
    ("feature_selection", SelectFromModel(LinearSVR(random_state=42))),  # 중요 특성만 남김

    ("svr", SVR())  # 최종 예측 모델
])

# 4. 하이퍼파라미터 탐색 공간
param_distributions = {
    "feature_selection__max_features": [4, 6, 8],  # 선택할 특성 수 (예시)
    "svr__kernel": ["linear", "rbf"],
    "svr__C": uniform(0.1, 100),
    "svr__gamma": uniform(0.001, 1)  # rbf일 때만 의미 있음
}

# 5. RandomizedSearchCV (3-겹 교차검증, 20회 탐색)
random_search = RandomizedSearchCV(
    pipe,
    param_distributions,
    n_iter=20,
    cv=3,
    scoring="neg_mean_squared_error",
    n_jobs=-1,
    random_state=42
)

# 6. 학습
random_search.fit(X_train, y_train)

# 7. 결과 확인
print("최적 하이퍼파라미터:", random_search.best_params_)
print("최적 성능 (MSE):", -random_search.best_score_)





최적 하이퍼파라미터: {'feature_selection__max_features': 8, 'svr__C': np.float64(2.1584494295802448), 'svr__gamma': np.float64(0.9709098521619943), 'svr__kernel': 'rbf'}
최적 성능 (MSE): 0.3966934323468048
