In [27]:
import numpy as np
import pandas as pd
import copy
import numpy as np
from collections import Counter
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
import datetime 

In [28]:
SIGMA = 0.1

def pairwise_euclidean_distance(X, Y, squared=False):
    XX = np.dot(X,X.T).diagonal()[:, np.newaxis]
    YY = np.dot(Y,Y.T).diagonal()[np.newaxis, :]
    distances = (-2 * np.dot(X,Y.T)) + XX + YY
    np.maximum(distances, 0, out=distances)
    if X is Y:
        np.fill_diagonal(distances, 0)
    return distances if squared else np.sqrt(distances, out=distances)

def pairwise_kernelized_euclidean_distance(X, Y, kernel, squared=False):
    XX = kernel(X,X).diagonal()[:, np.newaxis]
    YY = kernel(Y,Y).diagonal()[np.newaxis, :]
    distances = (-2 * kernel(X,Y)) + XX + YY
    np.maximum(distances, 0, out=distances)
    if X is Y:
        np.fill_diagonal(distances, 0)
    return distances if squared else np.sqrt(distances, out=distances)

def rbf_kernel(X, Y):
    K = pairwise_euclidean_distance(X, Y, squared=True)
    gamma = -1/(SIGMA**2)
    K *= gamma
    np.exp(K, K)
    return K

In [37]:
from sklearn.model_selection import StratifiedKFold

dataset = np.loadtxt('Datasets\BreastTissue.txt', delimiter = '\t', dtype='str') 
n = 9
x = dataset[:,:n].astype(np.float)
y = dataset[:,n]
mean_acc = {}    

for sigma in np.arange(0.1, 1.05, 0.05):
    SIGMA = sigma
    skf = StratifiedKFold(n_splits=10)
    pred_list = []
    for train, test in skf.split(x, y):
        X_train, y_train, X_test, y_test = x[train], y[train], x[test], y[test]
        distsances = pairwise_kernelized_euclidean_distance(X_train, X_test, rbf_kernel, squared=False)
        nearest_neibours = np.argmin(distsances, axis=0)
        predicted = y_train[nearest_neibours]
        accuracy = accuracy_score(y_test, predicted)
        pred_list.append(accuracy)
    mean_acc[sigma] = np.mean(pred_list)
mean_acc

{0.1: 0.21636363636363637,
 0.15000000000000002: 0.21636363636363637,
 0.20000000000000004: 0.21636363636363637,
 0.25000000000000006: 0.21636363636363637,
 0.30000000000000004: 0.21636363636363637,
 0.3500000000000001: 0.21636363636363637,
 0.40000000000000013: 0.21636363636363637,
 0.45000000000000007: 0.21636363636363637,
 0.5000000000000001: 0.21636363636363637,
 0.5500000000000002: 0.21636363636363637,
 0.6000000000000002: 0.21636363636363637,
 0.6500000000000001: 0.21636363636363637,
 0.7000000000000002: 0.21636363636363637,
 0.7500000000000002: 0.21636363636363637,
 0.8000000000000002: 0.21636363636363637,
 0.8500000000000002: 0.21636363636363637,
 0.9000000000000002: 0.21636363636363637,
 0.9500000000000003: 0.21636363636363637,
 1.0000000000000004: 0.21636363636363637}

In [23]:
from sklearn.model_selection import StratifiedKFold

dataset = np.loadtxt('Datasets\Diabetes.txt', delimiter = '\t', dtype='str')
n = 8
x = dataset[:,:n].astype(np.float)
y = dataset[:,n]
mean_acc = {}    

for sigma in np.arange(0.1, 1.05, 0.05):
    SIGMA = sigma
    skf = StratifiedKFold(n_splits=10)
    pred_list = []
    for train, test in skf.split(x, y):
        X_train, y_train, X_test, y_test = x[train], y[train], x[test], y[test]
        distsances = pairwise_kernelized_euclidean_distance(X_train, X_test, rbf_kernel, squared=False)
        nearest_neibours = np.argmin(distsances, axis=0)
        predicted = y_train[nearest_neibours]
        accuracy = accuracy_score(y_test, predicted)
        pred_list.append(accuracy)
    mean_acc[sigma] = np.mean(pred_list)
mean_acc

{0.1: 0.631578947368421,
 0.15000000000000002: 0.618421052631579,
 0.20000000000000004: 0.5263157894736842,
 0.25000000000000006: 0.5263157894736842,
 0.30000000000000004: 0.5,
 0.3500000000000001: 0.5131578947368423,
 0.40000000000000013: 0.5657894736842105,
 0.45000000000000007: 0.5789473684210528,
 0.5000000000000001: 0.5921052631578947,
 0.5500000000000002: 0.605263157894737,
 0.6000000000000002: 0.605263157894737,
 0.6500000000000001: 0.605263157894737,
 0.7000000000000002: 0.605263157894737,
 0.7500000000000002: 0.605263157894737,
 0.8000000000000002: 0.605263157894737,
 0.8500000000000002: 0.605263157894737,
 0.9000000000000002: 0.605263157894737,
 0.9500000000000003: 0.605263157894737,
 1.0000000000000004: 0.605263157894737}

In [33]:
from sklearn.model_selection import StratifiedKFold

dataset = np.loadtxt('Datasets\Glass.txt', delimiter = '\t', dtype='str')
n = 9
x = dataset[:,:n].astype(np.float)
y = dataset[:,n]
mean_acc = {}    

for sigma in np.arange(0.1, 1.05, 0.05):
    SIGMA = sigma
    skf = StratifiedKFold(n_splits=9)
    pred_list = []
    for train, test in skf.split(x, y):
        X_train, y_train, X_test, y_test = x[train], y[train], x[test], y[test]
        distsances = pairwise_kernelized_euclidean_distance(X_train, X_test, rbf_kernel, squared=False)
        nearest_neibours = np.argmin(distsances, axis=0)
        predicted = y_train[nearest_neibours]
        accuracy = accuracy_score(y_test, predicted)
        pred_list.append(accuracy)
    mean_acc[sigma] = np.mean(pred_list)
mean_acc

{0.1: 0.5656199677938809,
 0.15000000000000002: 0.5938003220611916,
 0.20000000000000004: 0.6125201288244768,
 0.25000000000000006: 0.6360708534621579,
 0.30000000000000004: 0.645330112721417,
 0.3500000000000001: 0.6545893719806765,
 0.40000000000000013: 0.6545893719806765,
 0.45000000000000007: 0.6638486312399356,
 0.5000000000000001: 0.6638486312399356,
 0.5500000000000002: 0.6638486312399356,
 0.6000000000000002: 0.6638486312399356,
 0.6500000000000001: 0.6686795491143318,
 0.7000000000000002: 0.6686795491143318,
 0.7500000000000002: 0.6686795491143318,
 0.8000000000000002: 0.6733091787439613,
 0.8500000000000002: 0.6733091787439613,
 0.9000000000000002: 0.6733091787439613,
 0.9500000000000003: 0.6733091787439613,
 1.0000000000000004: 0.6733091787439613}

In [34]:
from sklearn.model_selection import StratifiedKFold

dataset = np.loadtxt('Datasets\Ionosphere.txt', delimiter = ',', dtype='str')
n = 34
x = dataset[:,:n].astype(np.float)
y = dataset[:,n]
mean_acc = {}    

for sigma in np.arange(0.1, 1.05, 0.05):
    SIGMA = sigma
    skf = StratifiedKFold(n_splits=10)
    pred_list = []
    for train, test in skf.split(x, y):
        X_train, y_train, X_test, y_test = x[train], y[train], x[test], y[test]
        distsances = pairwise_kernelized_euclidean_distance(X_train, X_test, rbf_kernel, squared=False)
        nearest_neibours = np.argmin(distsances, axis=0)
        predicted = y_train[nearest_neibours]
        accuracy = accuracy_score(y_test, predicted)
        pred_list.append(accuracy)
    mean_acc[sigma] = np.mean(pred_list)
mean_acc

{0.1: 0.6553968253968254,
 0.15000000000000002: 0.6719047619047619,
 0.20000000000000004: 0.6803174603174603,
 0.25000000000000006: 0.6858730158730159,
 0.30000000000000004: 0.7028571428571427,
 0.3500000000000001: 0.7030952380952381,
 0.40000000000000013: 0.7116666666666667,
 0.45000000000000007: 0.7316666666666667,
 0.5000000000000001: 0.7431746031746032,
 0.5500000000000002: 0.7603968253968254,
 0.6000000000000002: 0.7775396825396825,
 0.6500000000000001: 0.7919047619047619,
 0.7000000000000002: 0.8119047619047619,
 0.7500000000000002: 0.8261904761904763,
 0.8000000000000002: 0.8461904761904762,
 0.8500000000000002: 0.8433333333333334,
 0.9000000000000002: 0.8461904761904762,
 0.9500000000000003: 0.8461904761904762,
 1.0000000000000004: 0.8461904761904762}

In [35]:
from sklearn.model_selection import StratifiedKFold

dataset = np.loadtxt('Datasets\Sonar.txt', delimiter = ',', dtype='str')
n = 60
x = dataset[:,:n].astype(np.float)
y = dataset[:,n]
mean_acc = {}    

for sigma in np.arange(0.1, 1.05, 0.05):
    SIGMA = sigma
    skf = StratifiedKFold(n_splits=10)
    pred_list = []
    for train, test in skf.split(x, y):
        X_train, y_train, X_test, y_test = x[train], y[train], x[test], y[test]
        distsances = pairwise_kernelized_euclidean_distance(X_train, X_test, rbf_kernel, squared=False)
        nearest_neibours = np.argmin(distsances, axis=0)
        predicted = y_train[nearest_neibours]
        accuracy = accuracy_score(y_test, predicted)
        pred_list.append(accuracy)
    mean_acc[sigma] = np.mean(pred_list)
mean_acc

{0.1: 0.5592857142857143,
 0.15000000000000002: 0.5633333333333332,
 0.20000000000000004: 0.605952380952381,
 0.25000000000000006: 0.6057142857142858,
 0.30000000000000004: 0.6057142857142858,
 0.3500000000000001: 0.6057142857142858,
 0.40000000000000013: 0.6057142857142858,
 0.45000000000000007: 0.6057142857142858,
 0.5000000000000001: 0.6057142857142858,
 0.5500000000000002: 0.6057142857142858,
 0.6000000000000002: 0.6057142857142858,
 0.6500000000000001: 0.6057142857142858,
 0.7000000000000002: 0.6057142857142858,
 0.7500000000000002: 0.6057142857142858,
 0.8000000000000002: 0.6057142857142858,
 0.8500000000000002: 0.6057142857142858,
 0.9000000000000002: 0.6057142857142858,
 0.9500000000000003: 0.6057142857142858,
 1.0000000000000004: 0.6057142857142858}

In [36]:
from sklearn.model_selection import StratifiedKFold

dataset = np.loadtxt('Datasets\Wine.txt', delimiter = ', ', dtype='str')
n = 13
x = dataset[:,:n].astype(np.float)
y = dataset[:,n]
mean_acc = {}    

for sigma in np.arange(0.1, 1.05, 0.05):
    SIGMA = sigma
    skf = StratifiedKFold(n_splits=10)
    pred_list = []
    for train, test in skf.split(x, y):
        X_train, y_train, X_test, y_test = x[train], y[train], x[test], y[test]
        distsances = pairwise_kernelized_euclidean_distance(X_train, X_test, rbf_kernel, squared=False)
        nearest_neibours = np.argmin(distsances, axis=0)
        predicted = y_train[nearest_neibours]
        accuracy = accuracy_score(y_test, predicted)
        pred_list.append(accuracy)
    mean_acc[sigma] = np.mean(pred_list)
mean_acc

{0.1: 0.33137254901960783,
 0.15000000000000002: 0.33137254901960783,
 0.20000000000000004: 0.33137254901960783,
 0.25000000000000006: 0.33137254901960783,
 0.30000000000000004: 0.33137254901960783,
 0.3500000000000001: 0.33137254901960783,
 0.40000000000000013: 0.33137254901960783,
 0.45000000000000007: 0.35424836601307186,
 0.5000000000000001: 0.35424836601307186,
 0.5500000000000002: 0.35424836601307186,
 0.6000000000000002: 0.365359477124183,
 0.6500000000000001: 0.376797385620915,
 0.7000000000000002: 0.38790849673202615,
 0.7500000000000002: 0.38790849673202615,
 0.8000000000000002: 0.40522875816993464,
 0.8500000000000002: 0.40522875816993464,
 0.9000000000000002: 0.4385620915032679,
 0.9500000000000003: 0.4385620915032679,
 1.0000000000000004: 0.47777777777777775}