In [45]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV

from sklearn import datasets
#from torchvision import datasets
from sklearn.model_selection import ParameterSampler, RandomizedSearchCV, cross_val_score
from sklearn import metrics
import GPyOpt


In [46]:
mnist = datasets.load_wine()
X = mnist['data']
y = mnist['target']
print(X.shape, y.shape)

(178, 13) (178,)


In [47]:
#Create KNN Object.
knn = KNeighborsClassifier()

#Split data into training and testing.
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)

print(y_train)

#Training the model.
knn.fit(x_train, y_train)

#Predict test data set.
y_pred = knn.predict(x_test)

#Checking performance our model with classification report.
print(classification_report(y_test, y_pred))

[0 0 0 2 2 0 0 1 1 0 1 2 0 2 1 1 1 0 0 1 1 1 2 2 0 2 1 1 1 0 2 1 1 0 1 1 1
 0 1 2 0 0 1 1 2 2 2 1 0 0 2 0 0 1 0 1 0 1 2 2 1 1 1 1 1 0 2 2 1 1 1 0 0 1
 0 0 0 1 1 2 2 0 2 1 2 0 2 2 1 0 1 0 0 1 1 0 1 1 2 2 0 1 0 2 1 2 1 1 1 1 0
 2 0 2 1 0 2 0 1 0 2 0 0 2 0 1 0 0 2 0 0 1 1 1 0 2 0 1 1 1 2 1]
              precision    recall  f1-score   support

           0       0.71      1.00      0.83        10
           1       0.59      0.77      0.67        13
           2       0.60      0.23      0.33        13

    accuracy                           0.64        36
   macro avg       0.63      0.67      0.61        36
weighted avg       0.63      0.64      0.59        36



In [31]:

def load_MNIST():

    # we want to flat the examples

    training_set = datasets.MNIST(root='./data', train=True, download=True, transform= None)
    test_set = datasets.MNIST(root='./data', train=False, download=True, transform= None)

    x_train = training_set.data.numpy().reshape(-1,28*28)
    x_test = test_set.data.numpy().reshape(-1,28*28)

    y_train = training_set.targets.numpy()
    y_test = test_set.targets.numpy()

    return x_train, y_train, x_test, y_test

## we can load the training set and test set
x_train, y_train, x_test, y_test = load_MNIST()

## we use a mask to selects those subsets
train_filter = np.isin(y_train, [3, 5, 8, 9])
test_filter = np.isin(y_test, [3, 5, 8, 9])

# apply the mask to the entire dataset
x_train, y_train = x_train[train_filter], y_train[train_filter]
x_test, y_test = x_test[test_filter], y_test[test_filter]


In [52]:
np.random.seed(42)
algo_list = ["ball_tree","kd_tree","brute"]
weight_list = ["uniform", "distance"]

algorithm = (0,1,2)
weights = (0,1)
n_neighbors = tuple(np.arange(1,100,1, dtype= np.int))
p = (1, 2)

domain = [{'name': 'algorithm', 'type': 'categorical', 'domain':algorithm},
          {'name': 'weights', 'type': 'categorical', 'domain':weights},
          {'name': 'n_neighbors', 'type': 'discrete', 'domain': n_neighbors},
          {'name': 'p', 'type': 'discrete', 'domain': p}]

def objective_function(x):
    scores_list = []
    param=x[0]
    
    temp1 = algo_list[int(param[0])]
    temp2 = weight_list[int(param[1])]
    model = KNeighborsClassifier(
        algorithm= temp1, 
        weights= temp2, 
        n_neighbors = int(param[2]), 
        p = int(param[3]),
        n_jobs=-1)
    
    model.fit(x_train, y_train)
    score = model.score(x_test, y_test)
    return score

acquisitions = ['MPI','EI','LCB']
for aq in acquisitions:
    opt = GPyOpt.methods.BayesianOptimization(f = objective_function,   # function to optimize
                                                  domain = domain,         # box-constrains of the problem
                                                  acquisition_type = aq ,      # Select acquisition function MPI, EI, LCB
                                                 )
    opt.acquisition.exploration_weight=0.5
    opt.run_optimization(max_iter=10) 

    x_best = opt.X[np.argmax(opt.Y)] # argmin -> argmax
    print()
    print("The best parameters obtained: algorithm=" + str(x_best[0]) + 
          ", weights=" + str(x_best[1]) +  
          ", n_neighbors=" + str(x_best[2]) + 
          ", p=" + str(x_best[3]))
    print(opt.Y)
    print(opt.X)


The best parameters obtained: algorithm=2.0, weights=0.0, n_neighbors=24.0, p=2.0
[[0.69444444]
 [0.69444444]
 [0.69444444]
 [0.75      ]
 [0.69444444]
 [0.61111111]
 [0.69444444]
 [0.69444444]
 [0.66666667]
 [0.69444444]
 [0.75      ]
 [0.69444444]
 [0.69444444]
 [0.72222222]
 [0.61111111]]
[[ 2.  0. 75.  2.]
 [ 0.  0. 75.  1.]
 [ 2.  1. 88.  2.]
 [ 2.  0. 24.  2.]
 [ 0.  0.  3.  2.]
 [ 0.  0.  2.  2.]
 [ 2.  0. 58.  2.]
 [ 1.  1. 59.  2.]
 [ 2.  0. 71.  2.]
 [ 1.  1. 63.  2.]
 [ 2.  1. 84.  1.]
 [ 0.  1. 76.  2.]
 [ 0.  0. 73.  1.]
 [ 1.  1. 78.  1.]
 [ 0.  0.  2.  2.]]

The best parameters obtained: algorithm=0.0, weights=0.0, n_neighbors=17.0, p=1.0
[[0.75      ]
 [0.69444444]
 [0.69444444]
 [0.75      ]
 [0.61111111]
 [0.61111111]
 [0.61111111]
 [0.61111111]
 [0.69444444]
 [0.61111111]
 [0.61111111]]
[[ 0.  0. 17.  1.]
 [ 0.  0. 76.  1.]
 [ 2.  1. 17.  2.]
 [ 2.  0. 16.  1.]
 [ 0.  0. 78.  2.]
 [ 0.  0. 79.  2.]
 [ 1.  0. 79.  2.]
 [ 2.  0. 79.  2.]
 [ 0.  1. 79.  2.]
 [ 0.  0. 7

In [50]:
model = KNeighborsClassifier(
        algorithm= algo_list[2], 
        weights= weight_list[0],
        n_neighbors = 24, 
        p = 2,
        n_jobs=-1)
model.fit(x_train, y_train)
print(model.score(x_test, y_test))

0.75
0.7222222222222222
0.8333333333333334
