In [18]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

# 캘리포니아 집값 데이터셋 로드
california_housing = fetch_california_housing()

X = california_housing.data
y = california_housing.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

knn = KNeighborsRegressor(n_neighbors=5, weights="uniform", algorithm="auto")

knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print(f"MSE :{mse}")

MSE :1.1186823858768293


## GridSearch() 로 최적 파라미터 선택

In [17]:
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

knn = KNeighborsRegressor()

param_grid = {
    "n_neighbors": [3, 5, 7],
    "weights": ["uniform", "distance"],
    "algorithm": ["auto", "ball_tree", "kd_tree", "brute"],
}

grid_search = GridSearchCV(knn, param_grid, cv=5)

# 데이터 표준화를 수행했더니 훨씬좋아짐
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_

best_params = grid_search.best_params_
print(f"best params {best_params}")

y_pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"MSE : {mse}")

best params {'algorithm': 'brute', 'n_neighbors': 7, 'weights': 'distance'}
MSE : 1.0690185813569677


In [5]:
from itertools import product
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

# 하이퍼파라미터 그리드 정의
param_grid = {
    "n_neighbors": [3, 5, 7],
    "weights": ["uniform", "distance"],
    "algorithm": ["auto", "ball_tree", "kd_tree", "brute"],
}

# 키 목록
keys = param_grid.keys()

# 모든 파라미터 조합 생성
for values in product(*param_grid.values()):
    params = dict(zip(keys, values))  # 딕셔너리로 변환

    # 모델 생성 및 학습
    knn = KNeighborsRegressor(**params)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)

    # MSE 계산
    mse = mean_squared_error(y_test, y_pred)

    # 결과 출력
    print(f"Params: {params} → MSE: {mse:.4f}")

Params: {'n_neighbors': 3, 'weights': 'uniform', 'algorithm': 'auto'} → MSE: 1.1694
Params: {'n_neighbors': 3, 'weights': 'uniform', 'algorithm': 'ball_tree'} → MSE: 1.1694
Params: {'n_neighbors': 3, 'weights': 'uniform', 'algorithm': 'kd_tree'} → MSE: 1.1694
Params: {'n_neighbors': 3, 'weights': 'uniform', 'algorithm': 'brute'} → MSE: 1.1694
Params: {'n_neighbors': 3, 'weights': 'distance', 'algorithm': 'auto'} → MSE: 1.1443
Params: {'n_neighbors': 3, 'weights': 'distance', 'algorithm': 'ball_tree'} → MSE: 1.1443
Params: {'n_neighbors': 3, 'weights': 'distance', 'algorithm': 'kd_tree'} → MSE: 1.1443
Params: {'n_neighbors': 3, 'weights': 'distance', 'algorithm': 'brute'} → MSE: 1.1443
Params: {'n_neighbors': 5, 'weights': 'uniform', 'algorithm': 'auto'} → MSE: 1.1187
Params: {'n_neighbors': 5, 'weights': 'uniform', 'algorithm': 'ball_tree'} → MSE: 1.1187
Params: {'n_neighbors': 5, 'weights': 'uniform', 'algorithm': 'kd_tree'} → MSE: 1.1187
Params: {'n_neighbors': 5, 'weights': 'uniform

In [19]:
from sklearn.linear_model import Ridge

# 3. Ridge 회귀 모델 정의
ridge = Ridge()

# 4. 하이퍼파라미터 설정 (그리드 서치용)
param_grid = {
    "alpha": [0.01, 0.1, 1, 10, 100],
    "solver": ["auto", "svd", "cholesky", "lsqr"],
}

# 5. 그리드 서치
grid = GridSearchCV(
    estimator=ridge,
    param_grid=param_grid,
    cv=5,
)

# 6. 학습
grid.fit(X_train, y_train)

# 7. 평가
best_model = grid.best_estimator_
y_pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

# 8. 결과 출력
print("Best Parameters:", grid.best_params_)
print("Test MSE:", mse)

Best Parameters: {'alpha': 10, 'solver': 'auto'}
Test MSE: 0.5550405537342997
