In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline

In [21]:
df = pd.read_csv("UCI_Credit_Card.csv")
x = df.drop("default.payment.next.month", axis=1)
y = df["default.payment.next.month"]
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.8, random_state = 42)


# Params => GridSearchCV and Steps for Pipeline

In [30]:
# params = dict(n_neighbors=range(1,30), weights=['uniform', 'distance'], algorithm = ['ball_tree', 'kd_tree', 'brute'], p = [1,2], metric = ['minkowski', 'euclidean'])
params = dict(n_neighbors=range(1,30), algorithm = ['ball_tree', 'kd_tree', 'brute'])
steps = [('scaler',StandardScaler()), ('Gridsearch', GridSearchCV(KNeighborsClassifier(), params, verbose = True))]
pipe = Pipeline(steps)
pipe

In [31]:
pipe.fit(x_train, y_train)
print(pipe.score(x_test, y_test))

Fitting 5 folds for each of 87 candidates, totalling 435 fits
0.805875


In [32]:
best_params = pipe.named_steps['Gridsearch'].best_params_
print(best_params)

{'algorithm': 'ball_tree', 'n_neighbors': 21}


In [33]:
steps

[('scaler', StandardScaler()),
 ('Gridsearch',
  GridSearchCV(estimator=KNeighborsClassifier(),
               param_grid={'algorithm': ['ball_tree', 'kd_tree', 'brute'],
                           'n_neighbors': range(1, 30)},
               verbose=True))]

# Lets append the KNN classifier to the pipeline

In [34]:
steps.append(('knn', KNeighborsClassifier(**best_params)))
# pipe = Pipeline(steps)
steps

[('scaler', StandardScaler()),
 ('Gridsearch',
  GridSearchCV(estimator=KNeighborsClassifier(),
               param_grid={'algorithm': ['ball_tree', 'kd_tree', 'brute'],
                           'n_neighbors': range(1, 30)},
               verbose=True)),
 ('knn', KNeighborsClassifier(algorithm='ball_tree', n_neighbors=21))]

In [35]:
Pipeline(steps)

In [36]:
pipe.fit(x_train, y_train)

Fitting 5 folds for each of 87 candidates, totalling 435 fits


In [37]:
pipe.score(x_test,y_test)

0.805875

In [38]:
pipe['Gridsearch'].best_params_

{'algorithm': 'ball_tree', 'n_neighbors': 21}

In [48]:
pipe.predict([list(x_test.iloc[0])])



array([0])