In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [3]:
# IRIS 데이터 셋 로드
iris = load_iris()

# 특성과 타겟 데이터 분할
X = iris.data
y = iris.target

# 학습 데이터와 테스트 데이터로 분할
X_train,X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=33)

In [4]:
# KNN 모델 초기화 및 파라미터 설정
knn = KNeighborsClassifier(n_neighbors=5,weights='uniform',algorithm='auto',n_jobs=1)

# 모델 학습
knn.fit(X_train,y_train)

# 분류 결과 예측
y_pred = knn.predict(X_test)

# 분류 결과 평가
report = classification_report(y_test,y_pred)
print(report)


              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       0.80      1.00      0.89         8
           2       1.00      0.86      0.92        14

    accuracy                           0.93        30
   macro avg       0.93      0.95      0.94        30
weighted avg       0.95      0.93      0.93        30



In [5]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

# 탐색할 파라미터 그리드 정의
param_grid = {
    'n_neighbors' : [1,3,5], # 이웃의 수
    'weights':['unform','distance'],
    'algorithm':['ball_tree','kd_tree','brute'] # 가중치 함수
}
# GridSearchCV를 사용하여 최적 파라미터 탐색
grid_search = GridSearchCV(knn,param_grid,cv=5)
grid_search.fit(X,y)


45 fits failed out of a total of 90.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
45 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\main\miniconda3\envs\openai_api_part1\Lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\main\miniconda3\envs\openai_api_part1\Lib\site-packages\sklearn\base.py", line 1382, in wrapper
    estimator._validate_params()
  File "c:\Users\main\miniconda3\envs\openai_api_part1\Lib\site-packages\sklearn\base.py", line 436, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\main\miniconda3\envs\openai_api_part1\Lib\site-packages\sklearn\utils\_par

In [6]:
# 최적 파라미터 확인
best_params = grid_search.best_params_
print('Best Paramethers:',best_params)

# 최적 파라미터로 훈련된 모델 사용하여 예측
y_pred = grid_search.predict(X_test)

# 분류 결과 평가
report = classification_report(y_test,y_pred)
print("classification_report")
print(report)

Best Paramethers: {'algorithm': 'ball_tree', 'n_neighbors': 3, 'weights': 'distance'}
classification_report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      1.00      1.00         8
           2       1.00      1.00      1.00        14

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.datasets import load_breast_cancer

In [8]:
# 데이터를 로드하고 전처리(학습데이터의 표준화)
X_train,y_train = load_breast_cancer(return_X_y=True)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

In [None]:
# 모델과 파라미터 그리드를 정의
model =  LogisticRegression()
# 탐색할 파라미터 그리드 정의
param_grid = {
    'n_neighbors' : [1,3,5], # 이웃의 수
    'weights':['unform','distance'],
    'algorithm':['ball_tree','kd_tree','brute'] # 가중치 함수
}
# GridSearchCV를 사용하여 최적 파라미터 탐색
grid_search = GridSearchCV(knn,param_grid,cv=5)
grid_search.fit(X,y)

In [None]:
# 그리드 서치를 수행.
grid_search = GridSearchCV(model, param_grid, cv=5)
grid_search.fit(X_train_scaled,y_train)

# 최적 파라미터와 최고 정확도를 출력.
print("Best Hyperparameters",grid_search.best_params_)
print("Best Accuracy",grid_search.best_score_)