In [44]:
# Imports
import numpy as np
from bokeh.plotting import figure, show, output_file, output_notebook
from bokeh.layouts import column
from sklearn import neighbors, datasets

In [45]:
# Data importing and organizing
X,Y = datasets.make_classification(n_samples=100, n_features=2, n_informative=2, n_redundant=0,n_clusters_per_class = 1,
                                   n_repeated=0, n_classes=3,shift =0.5,hypercube=False)

In [46]:
# Definitions
def leave_one_out(X,Y):
    item_amount = len(X)
    error_percentage = []
    minimal_good_neighbors = 0
    minimal_errors = len(X)
    for n_neighbors in range(1,item_amount - 1):
        errors = 0
        for i in range(item_amount):
            item = X[i]
            item_class = Y[i]
            X_t = np.delete(X,i,0)
            Y_t = np.delete(Y,i,0)
            clf = neighbors.KNeighborsClassifier(n_neighbors, weights='distance')
            clf.fit(X_t, Y_t)
            predicted_class = clf.predict(item.reshape(1, -1))
            if(predicted_class != item_class):
                errors = errors + 1
        error_percentage.append(errors/item_amount)
        if(errors<minimal_errors):
            minimal_errors = errors
            minimal_good_neighbors = n_neighbors
    return minimal_good_neighbors,error_percentage

In [47]:
# Calculations
x_steps = 400
y_steps = 400
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
x_step_len = (x_max-x_min)/x_steps
y_step_len = (y_max-y_min)/y_steps

clf1 = neighbors.KNeighborsClassifier(1, weights='distance')
clf1.fit(X, Y)

classes_field1 = []
for y in range(x_steps):
    classes_line = []
    for x in range(y_steps):
        classes_line.append(clf1.predict([[x_min+(x*x_step_len),y_min+(y*y_step_len)]]))
    classes_field1.append(classes_line)

n_neighbors,cross_validation_result = leave_one_out(X,Y)
clf = neighbors.KNeighborsClassifier(n_neighbors, weights='distance')
clf.fit(X, Y)

classes_field = []
for y in range(x_steps):
    classes_line = []
    for x in range(y_steps):
        classes_line.append(clf.predict([[x_min+(x*x_step_len),y_min+(y*y_step_len)]]))
    classes_field.append(classes_line)

In [48]:
# Visualizations
colors = [[250,0,0],[0,250,0],[0,0,250]]
TOOLS="hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,reset,tap,save,"

p_0 = figure(tools=TOOLS,x_range=(x_min,x_max), y_range=(y_min,y_max))
p_0.scatter(X[:,0], X[:,1],radius = 0.05,
          fill_color=["#%02x%02x%02x" % (colors[x][0],colors[x][1],colors[x][2]) for x in Y],
          line_color="#000000")


p_3 = figure(tools=TOOLS,x_range=(x_min,x_max), y_range=(y_min,y_max))

img1 = np.empty((x_steps,y_steps), dtype=np.uint32)
view = img1.view(dtype=np.uint8).reshape((x_steps, y_steps, 4))
for i in range(x_steps):
    for j in range(y_steps):
        view[j, i, 0] = int(max(colors[int(classes_field[j][i])][0]-40,0))
        view[j, i, 1] = int(max(colors[int(classes_field[j][i])][1]-40,0))
        view[j, i, 2] = int(max(colors[int(classes_field[j][i])][2]-40,0))
        view[j, i, 3] = 255
        
p_3.image_rgba(image=[img1], x=x_min, y=y_min, dw=(x_max-x_min), dh=(y_max-y_min))
p_3.scatter(X[:,0], X[:,1],radius = 0.05,
          fill_color=["#%02x%02x%02x" % (colors[x][0],colors[x][1],colors[x][2]) for x in Y],
          line_color="#000000")

p_1 = figure(tools=TOOLS,x_range=(x_min,x_max), y_range=(y_min,y_max))

img = np.empty((x_steps,y_steps), dtype=np.uint32)
view = img.view(dtype=np.uint8).reshape((x_steps, y_steps, 4))
for i in range(x_steps):
    for j in range(y_steps):
        view[j, i, 0] = int(max(colors[int(classes_field1[j][i])][0]-40,0))
        view[j, i, 1] = int(max(colors[int(classes_field1[j][i])][1]-40,0))
        view[j, i, 2] = int(max(colors[int(classes_field1[j][i])][2]-40,0))
        view[j, i, 3] = 255

p_1.image_rgba(image=[img], x=x_min, y=y_min, dw=(x_max-x_min), dh=(y_max-y_min))
p_1.scatter(X[:,0], X[:,1],radius = 0.05,
          fill_color=["#%02x%02x%02x" % (colors[x][0],colors[x][1],colors[x][2]) for x in Y],
          line_color="#000000")

p_2 = figure(tools=TOOLS)
hist, edges = np.histogram(cross_validation_result, density=True, bins=50)
print()
p_2.xaxis.axis_label = 'neighbors'
p_2.yaxis.axis_label = 'error %'
p_2.quad(top=cross_validation_result, bottom=0,
         left = [x for x in range(len(cross_validation_result))],
         right=[x+1 for x in range(len(cross_validation_result))],
        fill_color="#036564", line_color="#033649")
#p_2.line(x, pdf, line_color="#D95B43", line_width=8, alpha=0.7, legend="PDF")

output_file("KNN.html", title="KNN using "+str(n_neighbors) + " neighbors")
output_notebook()

show(column(p_0,p_3,p_1,p_2), notebook_handle=True)



