In [2]:
# Knn algorithm
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, cross_val_score
from data_prep import data_prep
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, classification_report, confusion_matrix
from imblearn.over_sampling import SVMSMOTE
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt

# read data and select important features
x_train, y_train, x_test, y_test = data_prep()
x_train = x_train[["duration","poutcome_success"]]
x_test = x_test[["duration","poutcome_success"]]

In [3]:
# calculate f1 score
def Model_metric(model, x, y):
    y_pred = model.predict(x)
    print("f1_score: ", f1_score(y, y_pred))
    print("accuracy: ",accuracy_score(y, y_pred))
    print("Best parameters: ", model.best_params_)    

In [4]:
# Randomised Search CV with SVMSMOTE resampling
sm = SVMSMOTE(random_state=123)
x_train_res, y_train_res = sm.fit_resample(x_train, y_train)
p = [1, 2]
n_neighbors = list(range(1, 300, 2))
weights = ['distance', 'uniform']
hyperparameters = dict(p = p, n_neighbors = n_neighbors, weights = weights)

# random search cross validation
clf = RandomizedSearchCV(KNeighborsClassifier(),
                         hyperparameters,
                         n_iter = 100,
                         verbose = 10,
                         cv = 5)
# fit model with resampling data
randomised_KNN = clf.fit(x_train_res, y_train_res)
Model_metric(randomised_KNN, x_test, y_test)

Fitting 5 folds for each of 100 candidates, totalling 500 fits
[CV 1/5; 1/100] START n_neighbors=3, p=2, weights=uniform.......................
[CV 1/5; 1/100] END n_neighbors=3, p=2, weights=uniform;, score=0.621 total time=   0.2s
[CV 2/5; 1/100] START n_neighbors=3, p=2, weights=uniform.......................
[CV 2/5; 1/100] END n_neighbors=3, p=2, weights=uniform;, score=0.583 total time=   0.2s
[CV 3/5; 1/100] START n_neighbors=3, p=2, weights=uniform.......................
[CV 3/5; 1/100] END n_neighbors=3, p=2, weights=uniform;, score=0.679 total time=   0.2s
[CV 4/5; 1/100] START n_neighbors=3, p=2, weights=uniform.......................
[CV 4/5; 1/100] END n_neighbors=3, p=2, weights=uniform;, score=0.640 total time=   0.2s
[CV 5/5; 1/100] START n_neighbors=3, p=2, weights=uniform.......................
[CV 5/5; 1/100] END n_neighbors=3, p=2, weights=uniform;, score=0.669 total time=   0.2s
[CV 1/5; 2/100] START n_neighbors=53, p=1, weights=distance.....................
[CV 1/

[CV 3/5; 10/100] END n_neighbors=115, p=1, weights=distance;, score=0.817 total time=   0.1s
[CV 4/5; 10/100] START n_neighbors=115, p=1, weights=distance...................
[CV 4/5; 10/100] END n_neighbors=115, p=1, weights=distance;, score=0.799 total time=   0.2s
[CV 5/5; 10/100] START n_neighbors=115, p=1, weights=distance...................
[CV 5/5; 10/100] END n_neighbors=115, p=1, weights=distance;, score=0.789 total time=   0.2s
[CV 1/5; 11/100] START n_neighbors=171, p=2, weights=uniform....................
[CV 1/5; 11/100] END n_neighbors=171, p=2, weights=uniform;, score=0.689 total time=   0.4s
[CV 2/5; 11/100] START n_neighbors=171, p=2, weights=uniform....................
[CV 2/5; 11/100] END n_neighbors=171, p=2, weights=uniform;, score=0.603 total time=   0.4s
[CV 3/5; 11/100] START n_neighbors=171, p=2, weights=uniform....................
[CV 3/5; 11/100] END n_neighbors=171, p=2, weights=uniform;, score=0.781 total time=   0.4s
[CV 4/5; 11/100] START n_neighbors=171, 

[CV 1/5; 20/100] END n_neighbors=143, p=1, weights=uniform;, score=0.694 total time=   0.3s
[CV 2/5; 20/100] START n_neighbors=143, p=1, weights=uniform....................
[CV 2/5; 20/100] END n_neighbors=143, p=1, weights=uniform;, score=0.608 total time=   0.3s
[CV 3/5; 20/100] START n_neighbors=143, p=1, weights=uniform....................
[CV 3/5; 20/100] END n_neighbors=143, p=1, weights=uniform;, score=0.796 total time=   0.3s
[CV 4/5; 20/100] START n_neighbors=143, p=1, weights=uniform....................
[CV 4/5; 20/100] END n_neighbors=143, p=1, weights=uniform;, score=0.753 total time=   0.3s
[CV 5/5; 20/100] START n_neighbors=143, p=1, weights=uniform....................
[CV 5/5; 20/100] END n_neighbors=143, p=1, weights=uniform;, score=0.760 total time=   0.3s
[CV 1/5; 21/100] START n_neighbors=65, p=2, weights=distance....................
[CV 1/5; 21/100] END n_neighbors=65, p=2, weights=distance;, score=0.659 total time=   0.1s
[CV 2/5; 21/100] START n_neighbors=65, p=2,

[CV 4/5; 29/100] END n_neighbors=89, p=1, weights=uniform;, score=0.736 total time=   0.3s
[CV 5/5; 29/100] START n_neighbors=89, p=1, weights=uniform.....................
[CV 5/5; 29/100] END n_neighbors=89, p=1, weights=uniform;, score=0.734 total time=   0.3s
[CV 1/5; 30/100] START n_neighbors=201, p=1, weights=uniform....................
[CV 1/5; 30/100] END n_neighbors=201, p=1, weights=uniform;, score=0.690 total time=   0.4s
[CV 2/5; 30/100] START n_neighbors=201, p=1, weights=uniform....................
[CV 2/5; 30/100] END n_neighbors=201, p=1, weights=uniform;, score=0.610 total time=   0.4s
[CV 3/5; 30/100] START n_neighbors=201, p=1, weights=uniform....................
[CV 3/5; 30/100] END n_neighbors=201, p=1, weights=uniform;, score=0.781 total time=   0.4s
[CV 4/5; 30/100] START n_neighbors=201, p=1, weights=uniform....................
[CV 4/5; 30/100] END n_neighbors=201, p=1, weights=uniform;, score=0.732 total time=   0.4s
[CV 5/5; 30/100] START n_neighbors=201, p=1, 

[CV 2/5; 39/100] END n_neighbors=259, p=2, weights=distance;, score=0.583 total time=   0.4s
[CV 3/5; 39/100] START n_neighbors=259, p=2, weights=distance...................
[CV 3/5; 39/100] END n_neighbors=259, p=2, weights=distance;, score=0.838 total time=   0.4s
[CV 4/5; 39/100] START n_neighbors=259, p=2, weights=distance...................
[CV 4/5; 39/100] END n_neighbors=259, p=2, weights=distance;, score=0.817 total time=   0.4s
[CV 5/5; 39/100] START n_neighbors=259, p=2, weights=distance...................
[CV 5/5; 39/100] END n_neighbors=259, p=2, weights=distance;, score=0.816 total time=   0.4s
[CV 1/5; 40/100] START n_neighbors=35, p=2, weights=distance....................
[CV 1/5; 40/100] END n_neighbors=35, p=2, weights=distance;, score=0.652 total time=   0.0s
[CV 2/5; 40/100] START n_neighbors=35, p=2, weights=distance....................
[CV 2/5; 40/100] END n_neighbors=35, p=2, weights=distance;, score=0.665 total time=   0.0s
[CV 3/5; 40/100] START n_neighbors=35, 

[CV 5/5; 48/100] END n_neighbors=177, p=1, weights=distance;, score=0.812 total time=   0.3s
[CV 1/5; 49/100] START n_neighbors=135, p=1, weights=uniform....................
[CV 1/5; 49/100] END n_neighbors=135, p=1, weights=uniform;, score=0.694 total time=   0.3s
[CV 2/5; 49/100] START n_neighbors=135, p=1, weights=uniform....................
[CV 2/5; 49/100] END n_neighbors=135, p=1, weights=uniform;, score=0.612 total time=   0.3s
[CV 3/5; 49/100] START n_neighbors=135, p=1, weights=uniform....................
[CV 3/5; 49/100] END n_neighbors=135, p=1, weights=uniform;, score=0.795 total time=   0.3s
[CV 4/5; 49/100] START n_neighbors=135, p=1, weights=uniform....................
[CV 4/5; 49/100] END n_neighbors=135, p=1, weights=uniform;, score=0.756 total time=   0.3s
[CV 5/5; 49/100] START n_neighbors=135, p=1, weights=uniform....................
[CV 5/5; 49/100] END n_neighbors=135, p=1, weights=uniform;, score=0.753 total time=   0.3s
[CV 1/5; 50/100] START n_neighbors=75, p=2

[CV 3/5; 58/100] END n_neighbors=195, p=2, weights=distance;, score=0.835 total time=   0.3s
[CV 4/5; 58/100] START n_neighbors=195, p=2, weights=distance...................
[CV 4/5; 58/100] END n_neighbors=195, p=2, weights=distance;, score=0.817 total time=   0.3s
[CV 5/5; 58/100] START n_neighbors=195, p=2, weights=distance...................
[CV 5/5; 58/100] END n_neighbors=195, p=2, weights=distance;, score=0.814 total time=   0.3s
[CV 1/5; 59/100] START n_neighbors=285, p=1, weights=distance...................
[CV 1/5; 59/100] END n_neighbors=285, p=1, weights=distance;, score=0.634 total time=   0.4s
[CV 2/5; 59/100] START n_neighbors=285, p=1, weights=distance...................
[CV 2/5; 59/100] END n_neighbors=285, p=1, weights=distance;, score=0.584 total time=   0.4s
[CV 3/5; 59/100] START n_neighbors=285, p=1, weights=distance...................
[CV 3/5; 59/100] END n_neighbors=285, p=1, weights=distance;, score=0.838 total time=   0.4s
[CV 4/5; 59/100] START n_neighbors=28

[CV 1/5; 68/100] END n_neighbors=71, p=2, weights=uniform;, score=0.693 total time=   0.2s
[CV 2/5; 68/100] START n_neighbors=71, p=2, weights=uniform.....................
[CV 2/5; 68/100] END n_neighbors=71, p=2, weights=uniform;, score=0.633 total time=   0.2s
[CV 3/5; 68/100] START n_neighbors=71, p=2, weights=uniform.....................
[CV 3/5; 68/100] END n_neighbors=71, p=2, weights=uniform;, score=0.738 total time=   0.2s
[CV 4/5; 68/100] START n_neighbors=71, p=2, weights=uniform.....................
[CV 4/5; 68/100] END n_neighbors=71, p=2, weights=uniform;, score=0.709 total time=   0.2s
[CV 5/5; 68/100] START n_neighbors=71, p=2, weights=uniform.....................
[CV 5/5; 68/100] END n_neighbors=71, p=2, weights=uniform;, score=0.702 total time=   0.2s
[CV 1/5; 69/100] START n_neighbors=129, p=2, weights=distance...................
[CV 1/5; 69/100] END n_neighbors=129, p=2, weights=distance;, score=0.652 total time=   0.2s
[CV 2/5; 69/100] START n_neighbors=129, p=2, we

[CV 4/5; 77/100] END n_neighbors=131, p=1, weights=distance;, score=0.807 total time=   0.2s
[CV 5/5; 77/100] START n_neighbors=131, p=1, weights=distance...................
[CV 5/5; 77/100] END n_neighbors=131, p=1, weights=distance;, score=0.803 total time=   0.2s
[CV 1/5; 78/100] START n_neighbors=1, p=2, weights=uniform......................
[CV 1/5; 78/100] END n_neighbors=1, p=2, weights=uniform;, score=0.613 total time=   0.2s
[CV 2/5; 78/100] START n_neighbors=1, p=2, weights=uniform......................
[CV 2/5; 78/100] END n_neighbors=1, p=2, weights=uniform;, score=0.573 total time=   0.2s
[CV 3/5; 78/100] START n_neighbors=1, p=2, weights=uniform......................
[CV 3/5; 78/100] END n_neighbors=1, p=2, weights=uniform;, score=0.649 total time=   0.2s
[CV 4/5; 78/100] START n_neighbors=1, p=2, weights=uniform......................
[CV 4/5; 78/100] END n_neighbors=1, p=2, weights=uniform;, score=0.612 total time=   0.2s
[CV 5/5; 78/100] START n_neighbors=1, p=2, weight

[CV 2/5; 87/100] END n_neighbors=47, p=2, weights=uniform;, score=0.642 total time=   0.2s
[CV 3/5; 87/100] START n_neighbors=47, p=2, weights=uniform.....................
[CV 3/5; 87/100] END n_neighbors=47, p=2, weights=uniform;, score=0.717 total time=   0.2s
[CV 4/5; 87/100] START n_neighbors=47, p=2, weights=uniform.....................
[CV 4/5; 87/100] END n_neighbors=47, p=2, weights=uniform;, score=0.680 total time=   0.2s
[CV 5/5; 87/100] START n_neighbors=47, p=2, weights=uniform.....................
[CV 5/5; 87/100] END n_neighbors=47, p=2, weights=uniform;, score=0.683 total time=   0.2s
[CV 1/5; 88/100] START n_neighbors=115, p=1, weights=uniform....................
[CV 1/5; 88/100] END n_neighbors=115, p=1, weights=uniform;, score=0.693 total time=   0.3s
[CV 2/5; 88/100] START n_neighbors=115, p=1, weights=uniform....................
[CV 2/5; 88/100] END n_neighbors=115, p=1, weights=uniform;, score=0.621 total time=   0.3s
[CV 3/5; 88/100] START n_neighbors=115, p=1, we

[CV 5/5; 96/100] END n_neighbors=33, p=2, weights=uniform;, score=0.671 total time=   0.2s
[CV 1/5; 97/100] START n_neighbors=153, p=1, weights=uniform....................
[CV 1/5; 97/100] END n_neighbors=153, p=1, weights=uniform;, score=0.696 total time=   0.3s
[CV 2/5; 97/100] START n_neighbors=153, p=1, weights=uniform....................
[CV 2/5; 97/100] END n_neighbors=153, p=1, weights=uniform;, score=0.608 total time=   0.3s
[CV 3/5; 97/100] START n_neighbors=153, p=1, weights=uniform....................
[CV 3/5; 97/100] END n_neighbors=153, p=1, weights=uniform;, score=0.794 total time=   0.3s
[CV 4/5; 97/100] START n_neighbors=153, p=1, weights=uniform....................
[CV 4/5; 97/100] END n_neighbors=153, p=1, weights=uniform;, score=0.752 total time=   0.4s
[CV 5/5; 97/100] START n_neighbors=153, p=1, weights=uniform....................
[CV 5/5; 97/100] END n_neighbors=153, p=1, weights=uniform;, score=0.753 total time=   0.4s
[CV 1/5; 98/100] START n_neighbors=147, p=1,

In [None]:
# Grid Search Fit without resampling
p = [1, 2]
n_neighbors = list(range(1, 200, 2))
weights = ['distance', 'uniform']
hyperparameters = dict(p = p, n_neighbors = n_neighbors, weights = weights)

KNN = GridSearchCV(KNeighborsClassifier(),
                   param_grid = hyperparameters,
                   verbose = 10,
                   cv = 5)

GridSearch_KNN = KNN.fit(x_train, y_train)
Model_metric(GridSearch_KNN, x_test, y_test)

Fitting 5 folds for each of 400 candidates, totalling 2000 fits
[CV 1/5; 1/400] START n_neighbors=1, p=1, weights=distance......................
[CV 1/5; 1/400] END n_neighbors=1, p=1, weights=distance;, score=0.486 total time=   0.0s
[CV 2/5; 1/400] START n_neighbors=1, p=1, weights=distance......................
[CV 2/5; 1/400] END n_neighbors=1, p=1, weights=distance;, score=0.413 total time=   0.0s
[CV 3/5; 1/400] START n_neighbors=1, p=1, weights=distance......................
[CV 3/5; 1/400] END n_neighbors=1, p=1, weights=distance;, score=0.476 total time=   0.0s
[CV 4/5; 1/400] START n_neighbors=1, p=1, weights=distance......................
[CV 4/5; 1/400] END n_neighbors=1, p=1, weights=distance;, score=0.438 total time=   0.0s
[CV 5/5; 1/400] START n_neighbors=1, p=1, weights=distance......................
[CV 5/5; 1/400] END n_neighbors=1, p=1, weights=distance;, score=0.437 total time=   0.0s
[CV 1/5; 2/400] START n_neighbors=1, p=1, weights=uniform.......................


[CV 4/5; 10/400] END n_neighbors=5, p=1, weights=uniform;, score=0.563 total time=   0.1s
[CV 5/5; 10/400] START n_neighbors=5, p=1, weights=uniform......................
[CV 5/5; 10/400] END n_neighbors=5, p=1, weights=uniform;, score=0.530 total time=   0.1s
[CV 1/5; 11/400] START n_neighbors=5, p=2, weights=distance.....................
[CV 1/5; 11/400] END n_neighbors=5, p=2, weights=distance;, score=0.585 total time=   0.0s
[CV 2/5; 11/400] START n_neighbors=5, p=2, weights=distance.....................
[CV 2/5; 11/400] END n_neighbors=5, p=2, weights=distance;, score=0.560 total time=   0.0s
[CV 3/5; 11/400] START n_neighbors=5, p=2, weights=distance.....................
[CV 3/5; 11/400] END n_neighbors=5, p=2, weights=distance;, score=0.639 total time=   0.0s
[CV 4/5; 11/400] START n_neighbors=5, p=2, weights=distance.....................
[CV 4/5; 11/400] END n_neighbors=5, p=2, weights=distance;, score=0.561 total time=   0.0s
[CV 5/5; 11/400] START n_neighbors=5, p=2, weights=

[CV 2/5; 20/400] END n_neighbors=9, p=2, weights=uniform;, score=0.697 total time=   0.1s
[CV 3/5; 20/400] START n_neighbors=9, p=2, weights=uniform......................
[CV 3/5; 20/400] END n_neighbors=9, p=2, weights=uniform;, score=0.760 total time=   0.1s
[CV 4/5; 20/400] START n_neighbors=9, p=2, weights=uniform......................
[CV 4/5; 20/400] END n_neighbors=9, p=2, weights=uniform;, score=0.722 total time=   0.1s
[CV 5/5; 20/400] START n_neighbors=9, p=2, weights=uniform......................
[CV 5/5; 20/400] END n_neighbors=9, p=2, weights=uniform;, score=0.667 total time=   0.1s
[CV 1/5; 21/400] START n_neighbors=11, p=1, weights=distance....................
[CV 1/5; 21/400] END n_neighbors=11, p=1, weights=distance;, score=0.771 total time=   0.0s
[CV 2/5; 21/400] START n_neighbors=11, p=1, weights=distance....................
[CV 2/5; 21/400] END n_neighbors=11, p=1, weights=distance;, score=0.770 total time=   0.0s
[CV 3/5; 21/400] START n_neighbors=11, p=1, weights

[CV 1/5; 30/400] END n_neighbors=15, p=1, weights=uniform;, score=0.864 total time=   0.1s
[CV 2/5; 30/400] START n_neighbors=15, p=1, weights=uniform.....................
[CV 2/5; 30/400] END n_neighbors=15, p=1, weights=uniform;, score=0.840 total time=   0.1s
[CV 3/5; 30/400] START n_neighbors=15, p=1, weights=uniform.....................
[CV 3/5; 30/400] END n_neighbors=15, p=1, weights=uniform;, score=0.877 total time=   0.1s
[CV 4/5; 30/400] START n_neighbors=15, p=1, weights=uniform.....................
[CV 4/5; 30/400] END n_neighbors=15, p=1, weights=uniform;, score=0.858 total time=   0.1s
[CV 5/5; 30/400] START n_neighbors=15, p=1, weights=uniform.....................
[CV 5/5; 30/400] END n_neighbors=15, p=1, weights=uniform;, score=0.840 total time=   0.1s
[CV 1/5; 31/400] START n_neighbors=15, p=2, weights=distance....................
[CV 1/5; 31/400] END n_neighbors=15, p=2, weights=distance;, score=0.866 total time=   0.0s
[CV 2/5; 31/400] START n_neighbors=15, p=2, weig

[CV 4/5; 39/400] END n_neighbors=19, p=2, weights=distance;, score=0.878 total time=   0.0s
[CV 5/5; 39/400] START n_neighbors=19, p=2, weights=distance....................
[CV 5/5; 39/400] END n_neighbors=19, p=2, weights=distance;, score=0.863 total time=   0.0s
[CV 1/5; 40/400] START n_neighbors=19, p=2, weights=uniform.....................
[CV 1/5; 40/400] END n_neighbors=19, p=2, weights=uniform;, score=0.878 total time=   0.1s
[CV 2/5; 40/400] START n_neighbors=19, p=2, weights=uniform.....................
[CV 2/5; 40/400] END n_neighbors=19, p=2, weights=uniform;, score=0.879 total time=   0.1s
[CV 3/5; 40/400] START n_neighbors=19, p=2, weights=uniform.....................
[CV 3/5; 40/400] END n_neighbors=19, p=2, weights=uniform;, score=0.891 total time=   0.1s
[CV 4/5; 40/400] START n_neighbors=19, p=2, weights=uniform.....................
[CV 4/5; 40/400] END n_neighbors=19, p=2, weights=uniform;, score=0.887 total time=   0.1s
[CV 5/5; 40/400] START n_neighbors=19, p=2, wei

[CV 4/5; 49/400] END n_neighbors=25, p=1, weights=distance;, score=0.890 total time=   0.0s
[CV 5/5; 49/400] START n_neighbors=25, p=1, weights=distance....................
[CV 5/5; 49/400] END n_neighbors=25, p=1, weights=distance;, score=0.878 total time=   0.0s
[CV 1/5; 50/400] START n_neighbors=25, p=1, weights=uniform.....................
[CV 1/5; 50/400] END n_neighbors=25, p=1, weights=uniform;, score=0.884 total time=   0.1s
[CV 2/5; 50/400] START n_neighbors=25, p=1, weights=uniform.....................
[CV 2/5; 50/400] END n_neighbors=25, p=1, weights=uniform;, score=0.894 total time=   0.1s
[CV 3/5; 50/400] START n_neighbors=25, p=1, weights=uniform.....................
[CV 3/5; 50/400] END n_neighbors=25, p=1, weights=uniform;, score=0.894 total time=   0.1s
[CV 4/5; 50/400] START n_neighbors=25, p=1, weights=uniform.....................
[CV 4/5; 50/400] END n_neighbors=25, p=1, weights=uniform;, score=0.905 total time=   0.1s
[CV 5/5; 50/400] START n_neighbors=25, p=1, wei

[CV 4/5; 59/400] END n_neighbors=29, p=2, weights=distance;, score=0.890 total time=   0.0s
[CV 5/5; 59/400] START n_neighbors=29, p=2, weights=distance....................
[CV 5/5; 59/400] END n_neighbors=29, p=2, weights=distance;, score=0.880 total time=   0.0s
[CV 1/5; 60/400] START n_neighbors=29, p=2, weights=uniform.....................
[CV 1/5; 60/400] END n_neighbors=29, p=2, weights=uniform;, score=0.885 total time=   0.1s
[CV 2/5; 60/400] START n_neighbors=29, p=2, weights=uniform.....................
[CV 2/5; 60/400] END n_neighbors=29, p=2, weights=uniform;, score=0.894 total time=   0.1s
[CV 3/5; 60/400] START n_neighbors=29, p=2, weights=uniform.....................
[CV 3/5; 60/400] END n_neighbors=29, p=2, weights=uniform;, score=0.893 total time=   0.1s
[CV 4/5; 60/400] START n_neighbors=29, p=2, weights=uniform.....................
[CV 4/5; 60/400] END n_neighbors=29, p=2, weights=uniform;, score=0.904 total time=   0.1s
[CV 5/5; 60/400] START n_neighbors=29, p=2, wei

[CV 4/5; 69/400] END n_neighbors=35, p=1, weights=distance;, score=0.890 total time=   0.0s
[CV 5/5; 69/400] START n_neighbors=35, p=1, weights=distance....................
[CV 5/5; 69/400] END n_neighbors=35, p=1, weights=distance;, score=0.880 total time=   0.0s
[CV 1/5; 70/400] START n_neighbors=35, p=1, weights=uniform.....................
[CV 1/5; 70/400] END n_neighbors=35, p=1, weights=uniform;, score=0.885 total time=   0.1s
[CV 2/5; 70/400] START n_neighbors=35, p=1, weights=uniform.....................
[CV 2/5; 70/400] END n_neighbors=35, p=1, weights=uniform;, score=0.894 total time=   0.1s
[CV 3/5; 70/400] START n_neighbors=35, p=1, weights=uniform.....................
[CV 3/5; 70/400] END n_neighbors=35, p=1, weights=uniform;, score=0.892 total time=   0.1s
[CV 4/5; 70/400] START n_neighbors=35, p=1, weights=uniform.....................
[CV 4/5; 70/400] END n_neighbors=35, p=1, weights=uniform;, score=0.904 total time=   0.1s
[CV 5/5; 70/400] START n_neighbors=35, p=1, wei

[CV 3/5; 79/400] END n_neighbors=39, p=2, weights=distance;, score=0.888 total time=   0.0s
[CV 4/5; 79/400] START n_neighbors=39, p=2, weights=distance....................
[CV 4/5; 79/400] END n_neighbors=39, p=2, weights=distance;, score=0.890 total time=   0.0s
[CV 5/5; 79/400] START n_neighbors=39, p=2, weights=distance....................
[CV 5/5; 79/400] END n_neighbors=39, p=2, weights=distance;, score=0.880 total time=   0.0s
[CV 1/5; 80/400] START n_neighbors=39, p=2, weights=uniform.....................
[CV 1/5; 80/400] END n_neighbors=39, p=2, weights=uniform;, score=0.885 total time=   0.1s
[CV 2/5; 80/400] START n_neighbors=39, p=2, weights=uniform.....................
[CV 2/5; 80/400] END n_neighbors=39, p=2, weights=uniform;, score=0.894 total time=   0.1s
[CV 3/5; 80/400] START n_neighbors=39, p=2, weights=uniform.....................
[CV 3/5; 80/400] END n_neighbors=39, p=2, weights=uniform;, score=0.894 total time=   0.1s
[CV 4/5; 80/400] START n_neighbors=39, p=2, we

[CV 3/5; 89/400] END n_neighbors=45, p=1, weights=distance;, score=0.888 total time=   0.0s
[CV 4/5; 89/400] START n_neighbors=45, p=1, weights=distance....................
[CV 4/5; 89/400] END n_neighbors=45, p=1, weights=distance;, score=0.890 total time=   0.0s
[CV 5/5; 89/400] START n_neighbors=45, p=1, weights=distance....................
[CV 5/5; 89/400] END n_neighbors=45, p=1, weights=distance;, score=0.880 total time=   0.0s
[CV 1/5; 90/400] START n_neighbors=45, p=1, weights=uniform.....................
[CV 1/5; 90/400] END n_neighbors=45, p=1, weights=uniform;, score=0.884 total time=   0.1s
[CV 2/5; 90/400] START n_neighbors=45, p=1, weights=uniform.....................
[CV 2/5; 90/400] END n_neighbors=45, p=1, weights=uniform;, score=0.894 total time=   0.1s
[CV 3/5; 90/400] START n_neighbors=45, p=1, weights=uniform.....................
[CV 3/5; 90/400] END n_neighbors=45, p=1, weights=uniform;, score=0.894 total time=   0.1s
[CV 4/5; 90/400] START n_neighbors=45, p=1, we

[CV 3/5; 99/400] END n_neighbors=49, p=2, weights=distance;, score=0.888 total time=   0.0s
[CV 4/5; 99/400] START n_neighbors=49, p=2, weights=distance....................
[CV 4/5; 99/400] END n_neighbors=49, p=2, weights=distance;, score=0.890 total time=   0.0s
[CV 5/5; 99/400] START n_neighbors=49, p=2, weights=distance....................
[CV 5/5; 99/400] END n_neighbors=49, p=2, weights=distance;, score=0.880 total time=   0.0s
[CV 1/5; 100/400] START n_neighbors=49, p=2, weights=uniform....................
[CV 1/5; 100/400] END n_neighbors=49, p=2, weights=uniform;, score=0.885 total time=   0.1s
[CV 2/5; 100/400] START n_neighbors=49, p=2, weights=uniform....................
[CV 2/5; 100/400] END n_neighbors=49, p=2, weights=uniform;, score=0.894 total time=   0.1s
[CV 3/5; 100/400] START n_neighbors=49, p=2, weights=uniform....................
[CV 3/5; 100/400] END n_neighbors=49, p=2, weights=uniform;, score=0.894 total time=   0.1s
[CV 4/5; 100/400] START n_neighbors=49, p=2

[CV 3/5; 109/400] END n_neighbors=55, p=1, weights=distance;, score=0.888 total time=   0.0s
[CV 4/5; 109/400] START n_neighbors=55, p=1, weights=distance...................
[CV 4/5; 109/400] END n_neighbors=55, p=1, weights=distance;, score=0.890 total time=   0.0s
[CV 5/5; 109/400] START n_neighbors=55, p=1, weights=distance...................
[CV 5/5; 109/400] END n_neighbors=55, p=1, weights=distance;, score=0.880 total time=   0.0s
[CV 1/5; 110/400] START n_neighbors=55, p=1, weights=uniform....................
[CV 1/5; 110/400] END n_neighbors=55, p=1, weights=uniform;, score=0.885 total time=   0.1s
[CV 2/5; 110/400] START n_neighbors=55, p=1, weights=uniform....................
[CV 2/5; 110/400] END n_neighbors=55, p=1, weights=uniform;, score=0.894 total time=   0.1s
[CV 3/5; 110/400] START n_neighbors=55, p=1, weights=uniform....................
[CV 3/5; 110/400] END n_neighbors=55, p=1, weights=uniform;, score=0.893 total time=   0.1s
[CV 4/5; 110/400] START n_neighbors=55, 

[CV 3/5; 119/400] END n_neighbors=59, p=2, weights=distance;, score=0.888 total time=   0.0s
[CV 4/5; 119/400] START n_neighbors=59, p=2, weights=distance...................
[CV 4/5; 119/400] END n_neighbors=59, p=2, weights=distance;, score=0.890 total time=   0.0s
[CV 5/5; 119/400] START n_neighbors=59, p=2, weights=distance...................
[CV 5/5; 119/400] END n_neighbors=59, p=2, weights=distance;, score=0.880 total time=   0.0s
[CV 1/5; 120/400] START n_neighbors=59, p=2, weights=uniform....................
[CV 1/5; 120/400] END n_neighbors=59, p=2, weights=uniform;, score=0.885 total time=   0.1s
[CV 2/5; 120/400] START n_neighbors=59, p=2, weights=uniform....................
[CV 2/5; 120/400] END n_neighbors=59, p=2, weights=uniform;, score=0.894 total time=   0.1s
[CV 3/5; 120/400] START n_neighbors=59, p=2, weights=uniform....................
[CV 3/5; 120/400] END n_neighbors=59, p=2, weights=uniform;, score=0.894 total time=   0.1s
[CV 4/5; 120/400] START n_neighbors=59, 

[CV 3/5; 129/400] END n_neighbors=65, p=1, weights=distance;, score=0.888 total time=   0.0s
[CV 4/5; 129/400] START n_neighbors=65, p=1, weights=distance...................
[CV 4/5; 129/400] END n_neighbors=65, p=1, weights=distance;, score=0.890 total time=   0.0s
[CV 5/5; 129/400] START n_neighbors=65, p=1, weights=distance...................
[CV 5/5; 129/400] END n_neighbors=65, p=1, weights=distance;, score=0.880 total time=   0.0s
[CV 1/5; 130/400] START n_neighbors=65, p=1, weights=uniform....................
[CV 1/5; 130/400] END n_neighbors=65, p=1, weights=uniform;, score=0.885 total time=   0.1s
[CV 2/5; 130/400] START n_neighbors=65, p=1, weights=uniform....................
[CV 2/5; 130/400] END n_neighbors=65, p=1, weights=uniform;, score=0.895 total time=   0.1s
[CV 3/5; 130/400] START n_neighbors=65, p=1, weights=uniform....................
[CV 3/5; 130/400] END n_neighbors=65, p=1, weights=uniform;, score=0.894 total time=   0.1s
[CV 4/5; 130/400] START n_neighbors=65, 

[CV 2/5; 139/400] END n_neighbors=69, p=2, weights=distance;, score=0.892 total time=   0.0s
[CV 3/5; 139/400] START n_neighbors=69, p=2, weights=distance...................
[CV 3/5; 139/400] END n_neighbors=69, p=2, weights=distance;, score=0.888 total time=   0.0s
[CV 4/5; 139/400] START n_neighbors=69, p=2, weights=distance...................
[CV 4/5; 139/400] END n_neighbors=69, p=2, weights=distance;, score=0.890 total time=   0.0s
[CV 5/5; 139/400] START n_neighbors=69, p=2, weights=distance...................
[CV 5/5; 139/400] END n_neighbors=69, p=2, weights=distance;, score=0.880 total time=   0.0s
[CV 1/5; 140/400] START n_neighbors=69, p=2, weights=uniform....................
[CV 1/5; 140/400] END n_neighbors=69, p=2, weights=uniform;, score=0.884 total time=   0.1s
[CV 2/5; 140/400] START n_neighbors=69, p=2, weights=uniform....................
[CV 2/5; 140/400] END n_neighbors=69, p=2, weights=uniform;, score=0.894 total time=   0.1s
[CV 3/5; 140/400] START n_neighbors=69,

[CV 1/5; 149/400] END n_neighbors=75, p=1, weights=distance;, score=0.885 total time=   0.0s
[CV 2/5; 149/400] START n_neighbors=75, p=1, weights=distance...................
[CV 2/5; 149/400] END n_neighbors=75, p=1, weights=distance;, score=0.892 total time=   0.0s
[CV 3/5; 149/400] START n_neighbors=75, p=1, weights=distance...................
[CV 3/5; 149/400] END n_neighbors=75, p=1, weights=distance;, score=0.888 total time=   0.0s
[CV 4/5; 149/400] START n_neighbors=75, p=1, weights=distance...................
[CV 4/5; 149/400] END n_neighbors=75, p=1, weights=distance;, score=0.890 total time=   0.0s
[CV 5/5; 149/400] START n_neighbors=75, p=1, weights=distance...................
[CV 5/5; 149/400] END n_neighbors=75, p=1, weights=distance;, score=0.880 total time=   0.0s
[CV 1/5; 150/400] START n_neighbors=75, p=1, weights=uniform....................
[CV 1/5; 150/400] END n_neighbors=75, p=1, weights=uniform;, score=0.885 total time=   0.1s
[CV 2/5; 150/400] START n_neighbors=75

[CV 4/5; 158/400] END n_neighbors=79, p=1, weights=uniform;, score=0.903 total time=   0.1s
[CV 5/5; 158/400] START n_neighbors=79, p=1, weights=uniform....................
[CV 5/5; 158/400] END n_neighbors=79, p=1, weights=uniform;, score=0.874 total time=   0.1s
[CV 1/5; 159/400] START n_neighbors=79, p=2, weights=distance...................
[CV 1/5; 159/400] END n_neighbors=79, p=2, weights=distance;, score=0.885 total time=   0.0s
[CV 2/5; 159/400] START n_neighbors=79, p=2, weights=distance...................
[CV 2/5; 159/400] END n_neighbors=79, p=2, weights=distance;, score=0.892 total time=   0.0s
[CV 3/5; 159/400] START n_neighbors=79, p=2, weights=distance...................
[CV 3/5; 159/400] END n_neighbors=79, p=2, weights=distance;, score=0.888 total time=   0.0s
[CV 4/5; 159/400] START n_neighbors=79, p=2, weights=distance...................
[CV 4/5; 159/400] END n_neighbors=79, p=2, weights=distance;, score=0.890 total time=   0.0s
[CV 5/5; 159/400] START n_neighbors=79,

[CV 2/5; 168/400] END n_neighbors=83, p=2, weights=uniform;, score=0.895 total time=   0.1s
[CV 3/5; 168/400] START n_neighbors=83, p=2, weights=uniform....................
[CV 3/5; 168/400] END n_neighbors=83, p=2, weights=uniform;, score=0.895 total time=   0.1s
[CV 4/5; 168/400] START n_neighbors=83, p=2, weights=uniform....................
[CV 4/5; 168/400] END n_neighbors=83, p=2, weights=uniform;, score=0.903 total time=   0.1s
[CV 5/5; 168/400] START n_neighbors=83, p=2, weights=uniform....................
[CV 5/5; 168/400] END n_neighbors=83, p=2, weights=uniform;, score=0.875 total time=   0.1s
[CV 1/5; 169/400] START n_neighbors=85, p=1, weights=distance...................
[CV 1/5; 169/400] END n_neighbors=85, p=1, weights=distance;, score=0.885 total time=   0.0s
[CV 2/5; 169/400] START n_neighbors=85, p=1, weights=distance...................
[CV 2/5; 169/400] END n_neighbors=85, p=1, weights=distance;, score=0.892 total time=   0.0s
[CV 3/5; 169/400] START n_neighbors=85, p

[CV 5/5; 177/400] END n_neighbors=89, p=1, weights=distance;, score=0.880 total time=   0.0s
[CV 1/5; 178/400] START n_neighbors=89, p=1, weights=uniform....................
[CV 1/5; 178/400] END n_neighbors=89, p=1, weights=uniform;, score=0.885 total time=   0.1s
[CV 2/5; 178/400] START n_neighbors=89, p=1, weights=uniform....................
[CV 2/5; 178/400] END n_neighbors=89, p=1, weights=uniform;, score=0.895 total time=   0.1s
[CV 3/5; 178/400] START n_neighbors=89, p=1, weights=uniform....................
[CV 3/5; 178/400] END n_neighbors=89, p=1, weights=uniform;, score=0.895 total time=   0.1s
[CV 4/5; 178/400] START n_neighbors=89, p=1, weights=uniform....................
[CV 4/5; 178/400] END n_neighbors=89, p=1, weights=uniform;, score=0.902 total time=   0.1s
[CV 5/5; 178/400] START n_neighbors=89, p=1, weights=uniform....................
[CV 5/5; 178/400] END n_neighbors=89, p=1, weights=uniform;, score=0.876 total time=   0.1s
[CV 1/5; 179/400] START n_neighbors=89, p=

[CV 3/5; 187/400] END n_neighbors=93, p=2, weights=distance;, score=0.888 total time=   0.0s
[CV 4/5; 187/400] START n_neighbors=93, p=2, weights=distance...................
[CV 4/5; 187/400] END n_neighbors=93, p=2, weights=distance;, score=0.890 total time=   0.0s
[CV 5/5; 187/400] START n_neighbors=93, p=2, weights=distance...................
[CV 5/5; 187/400] END n_neighbors=93, p=2, weights=distance;, score=0.880 total time=   0.0s
[CV 1/5; 188/400] START n_neighbors=93, p=2, weights=uniform....................
[CV 1/5; 188/400] END n_neighbors=93, p=2, weights=uniform;, score=0.885 total time=   0.1s
[CV 2/5; 188/400] START n_neighbors=93, p=2, weights=uniform....................
[CV 2/5; 188/400] END n_neighbors=93, p=2, weights=uniform;, score=0.894 total time=   0.1s
[CV 3/5; 188/400] START n_neighbors=93, p=2, weights=uniform....................
[CV 3/5; 188/400] END n_neighbors=93, p=2, weights=uniform;, score=0.894 total time=   0.1s
[CV 4/5; 188/400] START n_neighbors=93, 

[CV 2/5; 197/400] END n_neighbors=99, p=1, weights=distance;, score=0.892 total time=   0.0s
[CV 3/5; 197/400] START n_neighbors=99, p=1, weights=distance...................
[CV 3/5; 197/400] END n_neighbors=99, p=1, weights=distance;, score=0.888 total time=   0.0s
[CV 4/5; 197/400] START n_neighbors=99, p=1, weights=distance...................
[CV 4/5; 197/400] END n_neighbors=99, p=1, weights=distance;, score=0.890 total time=   0.0s
[CV 5/5; 197/400] START n_neighbors=99, p=1, weights=distance...................
[CV 5/5; 197/400] END n_neighbors=99, p=1, weights=distance;, score=0.880 total time=   0.0s
[CV 1/5; 198/400] START n_neighbors=99, p=1, weights=uniform....................
[CV 1/5; 198/400] END n_neighbors=99, p=1, weights=uniform;, score=0.885 total time=   0.1s
[CV 2/5; 198/400] START n_neighbors=99, p=1, weights=uniform....................
[CV 2/5; 198/400] END n_neighbors=99, p=1, weights=uniform;, score=0.895 total time=   0.1s
[CV 3/5; 198/400] START n_neighbors=99,

[CV 5/5; 206/400] END n_neighbors=103, p=1, weights=uniform;, score=0.875 total time=   0.1s
[CV 1/5; 207/400] START n_neighbors=103, p=2, weights=distance..................
[CV 1/5; 207/400] END n_neighbors=103, p=2, weights=distance;, score=0.885 total time=   0.0s
[CV 2/5; 207/400] START n_neighbors=103, p=2, weights=distance..................
[CV 2/5; 207/400] END n_neighbors=103, p=2, weights=distance;, score=0.892 total time=   0.0s
[CV 3/5; 207/400] START n_neighbors=103, p=2, weights=distance..................
[CV 3/5; 207/400] END n_neighbors=103, p=2, weights=distance;, score=0.888 total time=   0.0s
[CV 4/5; 207/400] START n_neighbors=103, p=2, weights=distance..................
[CV 4/5; 207/400] END n_neighbors=103, p=2, weights=distance;, score=0.890 total time=   0.0s
[CV 5/5; 207/400] START n_neighbors=103, p=2, weights=distance..................
[CV 5/5; 207/400] END n_neighbors=103, p=2, weights=distance;, score=0.880 total time=   0.0s
[CV 1/5; 208/400] START n_neighb

[CV 2/5; 216/400] END n_neighbors=107, p=2, weights=uniform;, score=0.895 total time=   0.1s
[CV 3/5; 216/400] START n_neighbors=107, p=2, weights=uniform...................
[CV 3/5; 216/400] END n_neighbors=107, p=2, weights=uniform;, score=0.894 total time=   0.1s
[CV 4/5; 216/400] START n_neighbors=107, p=2, weights=uniform...................
[CV 4/5; 216/400] END n_neighbors=107, p=2, weights=uniform;, score=0.903 total time=   0.1s
[CV 5/5; 216/400] START n_neighbors=107, p=2, weights=uniform...................


In [None]:
# store training result
results = []
# iterate K with p, weights from above randomizedsearchcv result
for n_neighbors in tqdm(range(1, 300, 2)):
    clf = KNeighborsClassifier(p=2, n_neighbors=n_neighbors, weights='distance')
    clf.fit(x_train_res, y_train_res)
    y_test_pred = clf.predict(x_test)
    y_train_pred = clf.predict(x_train_res)
    # get f1 score of train and test
    f1_test = f1_score(y_test_pred, y_test)
    f1_train = f1_score(y_train_pred, y_train_res)
    # get mean cv score
    cv_f1 = np.mean(cross_val_score(clf, x_train_res, y_train_res, cv=5, scoring = "f1"))
    # melt result and store them in results
    results.append([n_neighbors, f1_train, f1_test, cv_f1])

results = pd.DataFrame(
    data=results,
    columns=['K', 'train f1 score', 'test f1 score', 'cv f1 score'],
)

results = pd.melt(
    results,
    id_vars=['K'],
    var_name='type',
    value_name='f1 score'
)  # Melt dataframe for easier plotting

# plot f1 score of train, test, cv in one plot
sns.lineplot(
    x='K',
    y='f1 score',
    hue='type',
    data=results,
)

plt.title("F1 Score of KNN")
plt.grid()

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, cross_val_score
from data_prep import data_prep
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, classification_report, confusion_matrix
from imblearn.over_sampling import SVMSMOTE
sm = SVMSMOTE(random_state=123)
x_train_res, y_train_res = sm.fit_resample(x_train, y_train)

# fit the model with the best estimators
clf = KNeighborsClassifier(p=2, n_neighbors=120, weights='distance')
clf.fit(x_train_res, y_train_res)
y_test_pred = clf.predict(x_test)
# output classification reports
print(classification_report(y_test,y_test_pred))

In [None]:
# Randomised Search CV without resampling
p = [1, 2]
n_neighbors = list(range(1, 200, 2))
weights = ['uniform', 'distance']
hyperparameters = dict(p = p, n_neighbors = n_neighbors, weights = weights)
clf1 = RandomizedSearchCV(KNeighborsClassifier(),
                         hyperparameters,
                         n_iter = 100,
                         verbose = 10,
                         random_state = 123,
                         cv = 5)
randomised_KNN1 = clf1.fit(x_train, y_train)
Model_metric(randomised_KNN1, x_test, y_test)