In [42]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import mean_absolute_error
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

import pandas as pd

In [43]:
iris_data = load_iris(as_frame=True)
df = pd.concat([iris_data.data,iris_data.target],axis=1)
#df.target = df.target.astype('object')
#pd.get_dummies(df)
X = df.drop(columns=["sepal length (cm)"])
y = df["sepal length (cm)"]
X_train,X_test,y_train,y_test = train_test_split(X,y,
                                    stratify=X["target"],
                                    train_size=.5,
                                    random_state=8000)

In [58]:
onehot = ColumnTransformer([("onehot",OneHotEncoder(),["target"])],
                                remainder="passthrough")
columns = [x for x in X_train.columns if x!="target"]
scaler = ColumnTransformer([("scaler",StandardScaler(),columns)],
                                remainder="passthrough")
pipeline = make_pipeline(
    scaler,
    onehot,
    KNeighborsRegressor())
pipeline.get_params()



['sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [45]:
pipeline.fit(X_train,y_train)
pipeline.score(X_train,y_train)
mae = mean_absolute_error(pipeline.predict(X_train),y_train)
print(mae,y_train.min(),y_train.max())

0.2450666666666666 4.4 7.7


In [46]:
pipeline.score(X_test,y_test)

0.7918818420831126

In [56]:
param_grid = {
    'kneighborsregressor__n_neighbors': range(2,50),
    'kneighborsregressor__weights': ['uniform','distance'],
    'columntransformer__onehot__drop': [None,'first']
}
cv = GridSearchCV(pipeline,param_grid,cv=5,n_jobs=-1)
cv.fit(X_train,y_train)
cv.score(X_train,y_train)


0.9985495795161949

In [53]:
cv.score(X_test,y_test)

0.8057849232351046