In [1]:
from sklearn import datasets
from sklearn.decomposition import PCA

In [2]:
iris = datasets.load_iris()
X = iris.data[:, :2]
y = iris.target


In [3]:
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.model_selection import cross_validate
from sklearn.neighbors import KNeighborsRegressor
import numpy as np

In [4]:
modelo = KNeighborsRegressor()
scores = cross_validate(modelo, X, y, scoring=make_scorer(mean_squared_error, squared=False))
print(scores['test_score'])
print(scores)
sempad = np.mean(scores['test_score'])
print(f"Sem padronização: {sempad}")

[0.06324555 0.45607017 0.67330033 0.51510517 0.69761498]
{'fit_time': array([0.0015254 , 0.00048161, 0.0004406 , 0.00043941, 0.00042582]), 'score_time': array([0.00178027, 0.00077367, 0.00073314, 0.00073361, 0.000705  ]), 'test_score': array([0.06324555, 0.45607017, 0.67330033, 0.51510517, 0.69761498])}
Sem padronização: 0.48106724085424457


In [7]:
from sklearn.model_selection import GridSearchCV

parametros = {'n_neighbors': [1,3,5]}

modelo = GridSearchCV(KNeighborsRegressor(), parametros, scoring=make_scorer(mean_squared_error, greater_is_better=False, squared=False))
modelo.fit(X, y)

GridSearchCV(estimator=KNeighborsRegressor(),
             param_grid={'n_neighbors': [1, 3, 5]},
             scoring=make_scorer(mean_squared_error, greater_is_better=False, squared=False))

In [8]:
modelo.cv_results_


{'mean_fit_time': array([0.00079308, 0.00070238, 0.00081921]),
 'mean_score_time': array([0.00124578, 0.00119901, 0.00126863]),
 'mean_test_score': array([-0.58169526, -0.50369031, -0.48106724]),
 'param_n_neighbors': masked_array(data=[1, 3, 5],
              mask=[False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'n_neighbors': 1}, {'n_neighbors': 3}, {'n_neighbors': 5}],
 'rank_test_score': array([3, 2, 1], dtype=int32),
 'split0_test_score': array([-0.        , -0.        , -0.06324555]),
 'split1_test_score': array([-0.57735027, -0.50184844, -0.45607017]),
 'split2_test_score': array([-0.85634884, -0.75767676, -0.67330033]),
 'split3_test_score': array([-0.65828059, -0.55444333, -0.51510517]),
 'split4_test_score': array([-0.81649658, -0.704483  , -0.69761498]),
 'std_fit_time': array([1.10077070e-04, 3.39017471e-05, 1.31151111e-04]),
 'std_score_time': array([1.73104788e-04, 1.65885815e-04, 9.37227043e-05]),
 'std_test_score': array([0.3081622

In [9]:
modelo.best_estimator_


KNeighborsRegressor()

In [10]:
from sklearn.metrics import mean_squared_error
from math import sqrt

ypred = modelo.predict(X)
mse = mean_squared_error(y, ypred)
sqrt(mse)

0.3278210894171799

In [11]:
scores = cross_validate(modelo, X, y, scoring=make_scorer(mean_squared_error, squared=False), return_estimator=True)
print(scores['test_score'])
sempad = np.mean(scores['test_score'])
print(f"Sem padronização: {sempad}")

[0.06324555 0.45607017 0.67330033 0.51510517 0.69761498]
Sem padronização: 0.48106724085424457


In [12]:
scores

{'estimator': [GridSearchCV(estimator=KNeighborsRegressor(),
               param_grid={'n_neighbors': [1, 3, 5]},
               scoring=make_scorer(mean_squared_error, greater_is_better=False, squared=False)),
  GridSearchCV(estimator=KNeighborsRegressor(),
               param_grid={'n_neighbors': [1, 3, 5]},
               scoring=make_scorer(mean_squared_error, greater_is_better=False, squared=False)),
  GridSearchCV(estimator=KNeighborsRegressor(),
               param_grid={'n_neighbors': [1, 3, 5]},
               scoring=make_scorer(mean_squared_error, greater_is_better=False, squared=False)),
  GridSearchCV(estimator=KNeighborsRegressor(),
               param_grid={'n_neighbors': [1, 3, 5]},
               scoring=make_scorer(mean_squared_error, greater_is_better=False, squared=False)),
  GridSearchCV(estimator=KNeighborsRegressor(),
               param_grid={'n_neighbors': [1, 3, 5]},
               scoring=make_scorer(mean_squared_error, greater_is_better=False, squared=F

In [13]:
for estimator in scores['estimator']:
    print(estimator.best_estimator_)

KNeighborsRegressor()
KNeighborsRegressor()
KNeighborsRegressor()
KNeighborsRegressor()
KNeighborsRegressor()


In [14]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

parametros = {'n_neighbors': [7 ,9 ,11 ,13]}

GridSearchKNN = GridSearchCV(KNeighborsRegressor(), parametros, scoring='neg_root_mean_squared_error')

modelo = Pipeline([
    ("padronização", StandardScaler()),
    ("gsknn", GridSearchKNN)
])
scores = cross_validate(modelo, X, y, scoring=make_scorer(mean_squared_error, squared=False))
print(scores['test_score'])
compad = np.mean(scores['test_score'])
print(f"Com padronização: {compad}")

[0.0451754  0.44262667 0.62324584 0.50776287 0.69891156]
Com padronização: 0.46354446526594284


In [15]:
pipeline = Pipeline([
    ("padronização", StandardScaler()),
    ("knn", KNeighborsRegressor())
])

parametros = {'knn__n_neighbors': [7,9,11, 13]}

modelo = GridSearchCV(pipeline, parametros, scoring='neg_root_mean_squared_error')

scores = cross_validate(modelo, X, y, scoring=make_scorer(mean_squared_error, squared=False))
print(scores['test_score'])
compad = np.mean(scores['test_score'])
print(f"Com padronização: {compad}")

[0.0451754  0.44262667 0.63440455 0.50776287 0.68263196]
Com padronização: 0.4625202887734445
