In [154]:
# Imports
import numpy as np
import math
from bokeh.plotting import figure, show, output_file, output_notebook
from bokeh.layouts import column
from sklearn import datasets, metrics

In [192]:
# Data importing and organizing
'''
yacht_file = open("./YachtRegression.txt","r")
data = yacht_file.readlines()

typenames = None
dataset = []
for line in data:
    if(line[0]=="#"):
        typenames = [x[1:].replace("_"," ") for x in line.split()]
    else:
        if(len(dataset)<50):
            words = line.split()
            dataset.append([float(x) for x in words])

main_val = 5
regr_val = 6

X = [[x[main_val]] for x in dataset]
Y = [x[regr_val] for x in dataset]
X = np.asarray(X)
Y = np.asarray(Y)
'''
X,Y = datasets.make_regression(n_samples=50, n_features=1,n_targets=1,noise=2)

In [196]:
# Alghoritm definition
def lowess(x,X,Y,weights,kernel,h):
    if h == None:
        h = 1
    upper_sum = 0
    down_sum = 0
    for i in range(len(X)):
        upper_sum = upper_sum + Y[i]*weights[i]*kernel(metrics.pairwise.euclidean_distances([x],[X[i]])[0][0]*h)
        down_sum = down_sum + weights[i]*kernel(metrics.pairwise.euclidean_distances([x],[X[i]])[0][0]*h)
    return upper_sum/down_sum
    return 0

def cross_validation(X,Y,kernel,steps,h):
    weights = [1 for x in X]
    errors = [1 for x in X]
    for i in range(steps):
        for k in range(len(X)):
            item = X[k]
            val = Y[k]
            X_t = np.delete(X,k,0)
            Y_t = np.delete(Y,k)
            weights_t = np.delete(weights,k)
            val_t = lowess(item,X_t,Y_t,weights_t,kernel,h)
            errors[k] = abs(val_t-val)
        s = np.median(errors)
        weights = [(1-abs(e/(6*s))**2)**2 if abs((e/(6*s)))<=1 else 0 for e in errors]
        print(weights)
        print("step "+ str(i+1) + " completed!")
        print("-----"*10)
    print("Done!")
    return weights

def kernel_gauss(val):
    return (math.pi**(-0.5))*(math.e**(-(val**2)/2))


def kernel_cvart(val):
    return (15/16)*((1-val**2)**2)*(1 if math.fabs(val)<=1 else 0)

In [197]:
# Calculations
x_min = X[:,:].min()
x_max = X[:,:].max()

weights_gauss = np.asarray(cross_validation(X,Y,kernel_gauss,5,0.1))
wg_min,wg_max = weights_gauss[:].min(),weights_gauss[:].max()

weights_cvart = np.asarray(cross_validation(X,Y,kernel_cvart,5,0.1))
wc_min,wc_max = weights_cvart[:].min(),weights_cvart[:].max()

[0.94889898427779862, 0.89710624762094648, 0.98751033343742733, 0.92096155502579824, 0.76302387042030628, 0.98893968043439817, 0.99352938340396835, 0.59324777269554541, 0.96990966156464942, 0.99895964646691071, 0.95125233332816483, 0.80924372866466798, 0.9941809718603537, 0.97173994366597838, 0.50026791600979825, 0.95490582611655317, 0.96033565913448227, 0.94297349867642399, 0.94903271887160201, 0.99183064025067558, 0.91342016169491547, 0.9505248245469321, 0.98698747511938822, 0.98181636799794947, 0.86894213830128364, 0.99101248786685248, 0.53871631790421259, 0.99824796862115628, 0.99771967912651005, 0.73124174968778832, 0.98280807522607783, 0.94314094616420785, 0.99999604216865612, 0.95828593945406104, 0.92469704243206363, 0.88709934800542867, 0.9243745590535315, 0.86460462698721607, 0.87691123343113619, 0.40224853377717074, 0.97429738385011377, 0.25763221005569947, 0.75532759116873871, 0.57507767636685869, 0.9348657829738245, 0.97689323282836138, 0.94725423379583362, 0.86982438971018

[0.93907911293277424, 0.88380604852218325, 0.98188855808208109, 0.93835017174801771, 0.75211389606764256, 0.99505299214163034, 0.98849013631066218, 0.59085859276554065, 0.9806748378237915, 0.99616404118218382, 0.96516441554507992, 0.83963561383916896, 0.99774248161615486, 0.97856164160519432, 0.50006503562595073, 0.96749645040608401, 0.95099906601893847, 0.93256325190317568, 0.93726957266372879, 0.9965234696787646, 0.93589766388648221, 0.96404876514023419, 0.9928729482964378, 0.97485107675908145, 0.89300101522925635, 0.99613132136290439, 0.54356503865705508, 0.99525620963111128, 0.99495530139305488, 0.72188025208334805, 0.97374873898254333, 0.9613073401836103, 0.99947298312301502, 0.9496461351965042, 0.91438688989164052, 0.90924500294935384, 0.94060769201250338, 0.88809855992741338, 0.90054427459286335, 0.41145705336325172, 0.98341711781933694, 0.33009825590486092, 0.74182280195000294, 0.62675583360945086, 0.95083345507995243, 0.96908255468052618, 0.96060069579685803, 0.892275320672086

In [198]:
# Visualizations
colors = [[250,0,0],[0,0,250]]
TOOLS="hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,reset,tap,save,"
r = (x_max-x_min)/100

p_1 = figure(tools=TOOLS,x_range=(x_min,x_max))
p_1.scatter([x[0] for x in X], [x for x in Y],radius = r,
          fill_color=["#%02x%02x%02x" % 
                      (int((1-w)*255),int((1-w)*255),int((1-w)*255)) for w in weights_gauss],
          line_color="#000000")

x_steps = 400
x_step = (x_max-x_min)/x_steps
p_1.line([x_min+x_step*x for x in range(x_steps)],
         [lowess([x_min+x_step*x],X,Y,weights_gauss,kernel_gauss,1) for x in range(x_steps)],
         line_width=2)

p_1.xaxis.axis_label = typenames[main_val]
p_1.yaxis.axis_label = typenames[regr_val]



p_2 = figure(tools=TOOLS,x_range=(x_min,x_max))
p_2.scatter([x[0] for x in X], [x for x in Y],radius = r,
          fill_color=["#%02x%02x%02x" % 
                      (int((1-w)*255),int((1-w)*255),int((1-w)*255)) for w in weights_cvart],
          line_color="#000000")

p_2.line([x_min+x_step*x for x in range(x_steps)],
         [lowess([x_min+x_step*x],X,Y,weights_cvart,kernel_cvart,1) for x in range(x_steps)],
         line_width=2)

p_2.xaxis.axis_label = typenames[main_val]
p_2.yaxis.axis_label = typenames[regr_val]


output_file("Lowess.html", title="Lowess")
output_notebook()

show(column(p_1,p_2), notebook_handle=True)