## 그리드 서치 (Grid Search)
- 머신러닝 모델의 하이퍼파라미터 튜닝을 위한 방법
- 모델 학습시 사전에 정의된 하이퍼파라미터 조합을 하나식 대입해보면서 최적의 조합을 찾아내는 과정
- 조합이 다양해질수록 최적의 조합을 찾을 확률이 높아지지만 그만큼 리소스도 많이 사용된다.
- 그리드 서치, 랜덤 서치 방법이 있다.

![](https://velog.velcdn.com/images/newnew_daddy/post/b1ed6d60-a137-43fc-a244-8aa8ea8e53de/image.png)


#### 회귀모델에 Grid Search 적용

In [93]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.datasets import load_diabetes

dataset = load_diabetes()

X = dataset.data
y = dataset.target

X.shape, y.shape

((442, 10), (442,))

In [94]:
# 학습/테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((309, 10), (133, 10), (309,), (133,))

In [95]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [96]:
# Lasso Regression 모델 학습
lasso = Lasso(alpha=0.01, max_iter=10000)

lasso.get_params()

{'alpha': 0.01,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': 10000,
 'positive': False,
 'precompute': False,
 'random_state': None,
 'selection': 'cyclic',
 'tol': 0.0001,
 'warm_start': False}

In [97]:
from sklearn.tree import DecisionTreeRegressor
    
lasso = DecisionTreeRegressor(random_state = 0)

lasso.get_params()

{'ccp_alpha': 0.0,
 'criterion': 'squared_error',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'monotonic_cst': None,
 'random_state': 0,
 'splitter': 'best'}

In [98]:
param_grid = {
    'max_depth': [1,2,3,4,5,6,7,8]
}
# param_grid = {
#     'alpha': [0.1, 1.0, 10.0, 100.0],
#     'max_iter': [100, 500, 1000, 2000, 5000]
# }

In [99]:
from sklearn.model_selection import GridSearchCV

# Grid Search 설정
grid_search = GridSearchCV(
    estimator=lasso,
    param_grid=param_grid,
    cv=5,
    scoring='r2'
    )

# Grid Search 수행
grid_search.fit(X_train, y_train)

In [100]:
print(grid_search.best_params_)
print(grid_search.best_score_)

{'max_depth': 3}
0.29129699477060483


In [101]:
df = pd.DataFrame(grid_search.cv_results_)

df.sort_values(by='mean_test_score', ascending=False)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
2,0.000518,2e-05,0.000197,6.9e-05,3,{'max_depth': 3},0.305774,-0.077063,0.410094,0.56598,0.251701,0.291297,0.213087,1
1,0.000431,1.3e-05,0.000174,1.6e-05,2,{'max_depth': 2},0.315713,-0.073699,0.368563,0.534687,0.27799,0.284651,0.199491,2
3,0.000569,3e-06,0.000158,7e-06,4,{'max_depth': 4},0.29172,-0.145196,0.346223,0.458934,0.238425,0.238021,0.205061,3
0,0.000539,0.000212,0.000291,0.000124,1,{'max_depth': 1},0.184374,0.077678,0.317054,0.378056,0.124913,0.216415,0.113933,4
4,0.000737,0.0001,0.000198,6.3e-05,5,{'max_depth': 5},0.154405,-0.059409,0.364824,0.35138,0.264033,0.215046,0.156476,5
5,0.000996,0.000267,0.000232,6.8e-05,6,{'max_depth': 6},0.068395,-0.256657,0.390791,0.382843,0.139248,0.144924,0.238417,6
6,0.000866,9.9e-05,0.000178,1.9e-05,7,{'max_depth': 7},0.05705,-0.286423,0.251366,0.334061,0.171366,0.105484,0.216244,7
7,0.000862,6.6e-05,0.000193,4.4e-05,8,{'max_depth': 8},0.122537,-0.347513,0.157009,0.11925,0.069663,0.024189,0.187926,8


In [102]:
## 최고 성능 모델 선택

best_model = grid_search.best_estimator_

In [103]:
y_pred = best_model.predict(X_test)

In [104]:
print(best_model.score(X_train, y_train))
print(best_model.score(X_test, y_test))

0.5253186588715137
0.3300178400000161


#### 다항회귀에 Grid Search 적용

In [10]:
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(42)
m = 100
X = 6 * np.random.rand(m, 1) - 3                   # -3에서 3 사이의 임의의 값 100개
y = 0.5 * X ** 2 + X + 2 + np.random.randn(m, 1)   # 2차 함수 활용. 잡음 추가됨.


In [11]:
from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(degree=2, include_bias=False) # include_bias -> 0차항 출력 여부
X_poly = poly.fit_transform(X)

## 차수 확인
print(poly.get_feature_names_out())

print(X[0])

print(X_poly[0])

['x0' 'x0^2']
[-0.75275929]
[-0.75275929  0.56664654]


In [12]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(X_poly, y)

In [13]:
y_pred = lr.predict(X_poly)

In [17]:
param_grid = {
    'degree': [1, 2, 3, 4, 5]
}

In [18]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, make_scorer

scorer = make_scorer(mean_squared_error, greater_is_better=False)
# GridSearchCV 수행
grid_search = GridSearchCV(
    estimator=PolynomialFeatures(),
    param_grid=param_grid,
    scoring=scorer,
    cv=5
    )

grid_search.fit(X, y)

Traceback (most recent call last):
  File "/Users/hyunsoo/Desktop/Lecture/lecture/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 971, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/hyunsoo/Desktop/Lecture/lecture/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 279, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/hyunsoo/Desktop/Lecture/lecture/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 370, in _score
    response_method = _check_response_method(estimator, self._response_method)
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/hyunsoo/Desktop/Lecture/lecture/lib/python3.12/site-packages/sklearn/utils/validation.py", line 2145, in _c

In [19]:
print(f"Best Parameters (GridSearchCV): {grid_search.best_params_}")
print(f"Best Score (GridSearchCV): {grid_search.best_score_}")

Best Parameters (GridSearchCV): {'degree': 1}
Best Score (GridSearchCV): nan
