In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV

from sklearn import datasets
#from torchvision import datasets
from sklearn.model_selection import ParameterSampler, RandomizedSearchCV, cross_val_score
from sklearn import metrics
import GPyOpt


In [6]:
mnist = datasets.load_wine()
X = mnist['data']
y = mnist['target']
print(X.shape, y.shape)

(178, 13) (178,)


In [7]:
#Create KNN Object.
knn = KNeighborsClassifier()

#Split data into training and testing.
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)

print(y_train)

#Training the model.
knn.fit(x_train, y_train)

#Predict test data set.
y_pred = knn.predict(x_test)

#Checking performance our model with classification report.
print(classification_report(y_test, y_pred))

[0 0 0 2 2 0 0 1 1 0 1 2 0 2 1 1 1 0 0 1 1 1 2 2 0 2 1 1 1 0 2 1 1 0 1 1 1
 0 1 2 0 0 1 1 2 2 2 1 0 0 2 0 0 1 0 1 0 1 2 2 1 1 1 1 1 0 2 2 1 1 1 0 0 1
 0 0 0 1 1 2 2 0 2 1 2 0 2 2 1 0 1 0 0 1 1 0 1 1 2 2 0 1 0 2 1 2 1 1 1 1 0
 2 0 2 1 0 2 0 1 0 2 0 0 2 0 1 0 0 2 0 0 1 1 1 0 2 0 1 1 1 2 1]
              precision    recall  f1-score   support

           0       0.71      1.00      0.83        10
           1       0.59      0.77      0.67        13
           2       0.60      0.23      0.33        13

    accuracy                           0.64        36
   macro avg       0.63      0.67      0.61        36
weighted avg       0.63      0.64      0.59        36



In [13]:
def load_MNIST():
    '''
    Function to load the MNIST training and test set with corresponding labels.

    :return: training_examples, training_labels, test_examples, test_labels
    '''

    # we want to flat the examples

    training_set = datasets.MNIST(root='./data', train=True, download=True, transform= None)
    test_set = datasets.MNIST(root='./data', train=False, download=True, transform= None)

    x_train = training_set.data.numpy().reshape(-1,28*28)
    x_test = test_set.data.numpy().reshape(-1,28*28)

    y_train = training_set.targets.numpy()
    y_test = test_set.targets.numpy()

    return x_train, y_train, x_test, y_test

## we can load the training set and test set
x_train, y_train, x_test, y_test = load_MNIST()

## we use a mask to selects those subsets
train_filter = np.isin(y_train, [3, 5, 8, 9])
test_filter = np.isin(y_test, [3, 5, 8, 9])

# apply the mask to the entire dataset
x_train, y_train = x_train[train_filter], y_train[train_filter]
x_test, y_test = x_test[test_filter], y_test[test_filter]

In [9]:
algo_list = ["ball_tree","kd_tree","brute"]
weight_list = ["uniform", "distance"]

algorithm = (0,1,2)
weights = (0,1)
n_neighbors = tuple(np.arange(1,100,1, dtype= np.int))
p = (1, 2)

domain = [{'name': 'algorithm', 'type': 'categorical', 'domain':algorithm},
          {'name': 'weights', 'type': 'categorical', 'domain':weights},
          {'name': 'n_neighbors', 'type': 'discrete', 'domain': n_neighbors},
          {'name': 'p', 'type': 'discrete', 'domain': p}]


def objective_function(x):
    scores_list = []
    param=x[0]
    
    temp1 = algo_list[int(param[0])]
    temp2 = weight_list[int(param[1])]
    model = KNeighborsClassifier(
        algorithm= temp1, 
        weights= temp2, 
        n_neighbors = int(param[2]), 
        p = int(param[3]),
        n_jobs=-1)
    model.fit(x_train, y_train)
    score = model.score(x_test, y_test)
    return score
    
opt = GPyOpt.methods.BayesianOptimization(f = objective_function,   # function to optimize
                                              domain = domain,         # box-constrains of the problem
                                              acquisition_type = 'MPI' ,      # Select acquisition function MPI, EI, LCB
                                             )
opt.acquisition.exploration_weight=0.5
opt.run_optimization(max_iter=1) 

x_best = opt.X[np.argmin(opt.Y)]
print()
print("The best parameters obtained: algorithm=" + str(x_best[0]) + ", weights=" + str(x_best[1]) +  ", n_neighbors=" + str(x_best[2]) + ", p=" + str(
    x_best[3]))


The best parameters obtained: algorithm=0.0, weights=0.0, n_neighbors=63.0, p=1.0


In [10]:
model = KNeighborsClassifier(
        algorithm= algo_list[int(x_best[0])], 
        weights= weight_list[int(x_best[1])],
        n_neighbors = int(x_best[2]), 
        p = int(x_best[3]),
        n_jobs=-1)
model.fit(x_train, y_train)
print(model.score(x_test, y_test))

0.6666666666666666


In [79]:
import random, os
def seed_everything(TORCH_SEED):
    random.seed(TORCH_SEED)
    os.environ['PYTHONHASHSEED'] = str(TORCH_SEED)
    np.random.seed(TORCH_SEED)
seed_everything(42069)