In [260]:
# Imports
import numpy as np
from bokeh.plotting import figure, show, output_file
from bokeh.layouts import column
from sklearn import neighbors, datasets, metrics
import decimal

In [261]:
# Data importing and organizing
X,Y = datasets.make_classification(n_samples=100, n_features=2, n_informative=2, n_redundant=0, n_repeated=0, n_classes=2)

In [262]:
# Alghoritm definition
def frange(x, y, jump):
    while x < y:
        yield x
        x += jump

def cross_validation(X,Y,start,end,step,weight):
    item_amount = len(X)
    error_percentage = []
    minimal_good_radius = 0
    minimal_errors = len(X)
    for radius in frange(start,end, step):
        errors = 0
        for i in range(item_amount):
            item = X[i]
            item_class = Y[i]
            X_t = np.delete(X,i,0)
            Y_t = np.delete(Y,i,0)
            clf = neighbors.RadiusNeighborsClassifier(radius, weights=weight,outlier_label=-2)
            clf.fit(X_t, Y_t)
            predicted_class = clf.predict(item.reshape(1, -1))
            if(predicted_class != item_class):
                errors = errors + 1
        error_percentage.append(errors/item_amount)
        if(errors<minimal_errors):
            minimal_errors = errors
            minimal_good_radius = radius
    return minimal_good_radius,error_percentage

def kernel_distance(weights):
    if(type(weights[0]) == type(1.1)):
        res = np.ndarray(shape = (1))
        res[0] = 0
        return res
    else:
        maxDist = 0
        for x in weights[0]:
            maxDist = max(maxDist,x)
        return np.asarray([np.asarray([1-(abs(x)/maxDist) for x in weights[0]])])
    
def kernel_rbf(weights):
    if(type(weights[0]) == type(1.1)):
        res = np.ndarray(shape = (1))
        res[0] = 0
        return res
    else:
        return np.asarray([np.asarray([metrics.pairwise.rbf_kernel([[x]],[[0]])[0][0] for x in weights[0]])])

In [263]:
# Global data
x_steps = 500
y_steps = 500
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
x_step_len = (x_max-x_min)/x_steps
y_step_len = (y_max-y_min)/y_steps

cv_min = 0.1
cv_max = np.sqrt((x_max-x_min)**2 + (y_max-y_min)**2)
cv_steps = 1000
cv_step = (cv_max-cv_min)/cv_steps

In [264]:
# Calculations distance kernel
radius,cross_validation_result = cross_validation(X,Y,cv_min,cv_max,cv_step,"distance")
clf = neighbors.RadiusNeighborsClassifier(radius, weights="distance",outlier_label=-1)
clf.fit(X, Y)
classes_field = []
for y in range(x_steps):
    classes_line = []
    for x in range(y_steps):
        classes_line.append(clf.predict([[x_min+(x*x_step_len),y_min+(y*y_step_len)]]))
    classes_field.append(classes_line)

In [265]:
# Visualizations distance kernel
colors = [[250,0,0],[0,0,250],[112,112,112]]
TOOLS="hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,reset,tap,save,"

p_1 = figure(tools=TOOLS,x_range=(x_min,x_max), y_range=(y_min,y_max))

img = np.empty((x_steps,y_steps), dtype=np.uint32)
view = img.view(dtype=np.uint8).reshape((x_steps, y_steps, 4))
for i in range(x_steps):
    for j in range(y_steps):
        view[j, i, 0] = int(max(colors[int(classes_field[j][i])][0]-40,0))
        view[j, i, 1] = int(max(colors[int(classes_field[j][i])][1]-40,0))
        view[j, i, 2] = int(max(colors[int(classes_field[j][i])][2]-40,0))
        view[j, i, 3] = 255

p_1.image_rgba(image=[img], x=x_min, y=y_min, dw=(x_max-x_min), dh=(y_max-y_min))
p_1.scatter(X[:,0], X[:,1],radius = 0.05,
          fill_color=["#%02x%02x%02x" % (colors[x][0],colors[x][1],colors[x][2]) for x in Y],
          line_color="#000000")

p_2 = figure(tools=TOOLS)
hist, edges = np.histogram(cross_validation_result, density=True, bins=50)
print()
p_2.xaxis.axis_label = 'radius'
p_2.yaxis.axis_label = 'error %'
p_2.quad(top=cross_validation_result, bottom=0,
         left = [cv_min + x*cv_step for x in range(len(cross_validation_result))],
         right=[cv_min + (x+1)*cv_step for x in range(len(cross_validation_result))],
        fill_color="#036564", line_color="#033649")

p_3 = figure()
p_3.line([x for x in frange(-1,1,0.1)], kernel_distance([[x for x in frange(-1,1,0.1)]])[0], line_width=2)

output_file("Parzen_distance_kernel.html", title="Parsen using "+str(radius) + " radius")

show(column(p_1,p_2,p_3))




In [266]:
# Calculations rbf kernel
radius,cross_validation_result = cross_validation(X,Y,cv_min,cv_max,cv_step,kernel_rbf)
clf = neighbors.RadiusNeighborsClassifier(radius, weights=kernel_rbf,outlier_label=-1)
clf.fit(X, Y)
classes_field = []
for y in range(x_steps):
    classes_line = []
    for x in range(y_steps):
        classes_line.append(clf.predict([[x_min+(x*x_step_len),y_min+(y*y_step_len)]]))
    classes_field.append(classes_line)

KeyboardInterrupt: 

In [None]:
# Visualizations rbf kernel
colors = [[250,0,0],[0,0,250],[112,112,112]]
TOOLS="hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,reset,tap,save,"

p_1 = figure(tools=TOOLS,x_range=(x_min,x_max), y_range=(y_min,y_max))

img = np.empty((x_steps,y_steps), dtype=np.uint32)
view = img.view(dtype=np.uint8).reshape((x_steps, y_steps, 4))
for i in range(x_steps):
    for j in range(y_steps):
        view[j, i, 0] = int(max(colors[int(classes_field[j][i])][0]-40,0))
        view[j, i, 1] = int(max(colors[int(classes_field[j][i])][1]-40,0))
        view[j, i, 2] = int(max(colors[int(classes_field[j][i])][2]-40,0))
        view[j, i, 3] = 255

p_1.image_rgba(image=[img], x=x_min, y=y_min, dw=(x_max-x_min), dh=(y_max-y_min))
p_1.scatter(X[:,0], X[:,1],radius = 0.05,
          fill_color=["#%02x%02x%02x" % (colors[x][0],colors[x][1],colors[x][2]) for x in Y],
          line_color="#000000")

p_2 = figure(tools=TOOLS)
hist, edges = np.histogram(cross_validation_result, density=True, bins=50)
print()
p_2.xaxis.axis_label = 'radius'
p_2.yaxis.axis_label = 'error %'
p_2.quad(top=cross_validation_result, bottom=0,
         left = [cv_min + x*cv_step for x in range(len(cross_validation_result))],
         right=[cv_min + (x+1)*cv_step for x in range(len(cross_validation_result))],
        fill_color="#036564", line_color="#033649")

p_3 = figure()
p_3.line([x for x in frange(-2,2,0.1)], kernel_rbf([[x for x in frange(-2,2,0.1)]])[0], line_width=2)

output_file("Parzen_sigmoid_kernel.html", title="Parsen using "+str(radius) + " radius")

show(column(p_1,p_2,p_3))