In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV

#from sklearn import datasets
from torchvision import datasets
from sklearn.model_selection import ParameterSampler, RandomizedSearchCV, cross_val_score
from sklearn import metrics
import GPyOpt


In [2]:
mnist = datasets.load_wine()
X = mnist['data']
y = mnist['target']
print(X.shape, y.shape)

(178, 13) (178,)


In [12]:
#Create KNN Object.
knn = KNeighborsClassifier(n_neighbors=1)

#Split data into training and testing.
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)

#Training the model.
knn.fit(x_train, y_train)

#Predict test data set.
y_pred = knn.predict(x_test)

#Checking performance our model with classification report.
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.83      1.00      0.91        10
           1       0.83      0.77      0.80        13
           2       0.83      0.77      0.80        13

    accuracy                           0.83        36
   macro avg       0.83      0.85      0.84        36
weighted avg       0.83      0.83      0.83        36



In [15]:
def load_MNIST():

    # we want to flat the examples

    training_set = datasets.MNIST(root='./data', train=True, download=True, transform= None)
    test_set = datasets.MNIST(root='./data', train=False, download=True, transform= None)

    x_train = training_set.data.numpy().reshape(-1,28*28)
    x_test = test_set.data.numpy().reshape(-1,28*28)

    y_train = training_set.targets.numpy()
    y_test = test_set.targets.numpy()

    return x_train, y_train, x_test, y_test

## we can load the training set and test set
x_train, y_train, x_test, y_test = load_MNIST()

## we use a mask to selects those subsets
train_filter = np.isin(y_train, [3, 5, 8, 9])
test_filter = np.isin(y_test, [3, 5, 8, 9])

# apply the mask to the entire dataset
x_train, y_train = x_train[train_filter], y_train[train_filter]
x_test, y_test = x_test[test_filter], y_test[test_filter]


In [17]:
np.random.seed(42)

n_neighbors = tuple(np.arange(1,50,1, dtype=np.int))

domain = [
          {'name': 'n_neighbors', 'type': 'discrete', 'domain': n_neighbors},
         ]

def objective_function(x):
    scores_list = []
    param=x[0][0]
    
    model = KNeighborsClassifier(
        n_neighbors = int(param), 
        n_jobs=-1)
    
    model.fit(x_train, y_train)
    score = model.score(x_test, y_test)
    return score

acquisitions = ['MPI','EI','LCB']
for aq in acquisitions:
    opt = GPyOpt.methods.BayesianOptimization(f = objective_function,   # function to optimize
                                                  domain = domain,         # box-constrains of the problem
                                                  acquisition_type = aq ,      # Select acquisition function MPI, EI, LCB
                                                 )
    opt.acquisition.exploration_weight=0.5
    opt.run_optimization(max_iter=10) 

    x_best = opt.X[np.argmax(opt.Y)] # argmin -> argmax
    print()
    print("n_neighbors=" + str(x_best[0]))
    print(opt.Y)
    print(opt.X)


n_neighbors=15.0
[[0.96885457]
 [0.97091377]
 [0.97425997]
 [0.96885457]
 [0.97374517]
 [0.96962677]
 [0.96705277]
 [0.96602317]
 [0.96602317]]
[[39.]
 [29.]
 [15.]
 [43.]
 [ 8.]
 [41.]
 [44.]
 [49.]
 [49.]]

n_neighbors=17.0
[[0.97425997]
 [0.96628057]
 [0.97503218]
 [0.96833977]
 [0.97168597]
 [0.96602317]
 [0.96602317]]
[[16.]
 [47.]
 [17.]
 [40.]
 [28.]
 [49.]
 [49.]]

n_neighbors=13.0
[[0.97477477]
 [0.96962677]
 [0.96885457]
 [0.96628057]
 [0.96962677]
 [0.96602317]
 [0.96602317]]
[[13.]
 [41.]
 [42.]
 [47.]
 [37.]
 [49.]
 [49.]]


In [10]:
model = KNeighborsClassifier(
        n_neighbors = 1, 
        n_jobs=-1)
model.fit(x_train, y_train)
print(model.score(x_test, y_test))

0.8333333333333334
