In [2]:
# Imports
import numpy as np
import math
import pandas
import bokeh
from bokeh.plotting import figure, show, output_file, output_notebook
from bokeh.layouts import column
from sklearn import datasets, metrics
from sklearn.linear_model import Ridge
import numpy.linalg as linalg

In [12]:
# Data importing and organizing
X,Y = datasets.make_regression(n_samples=50, n_features=3,n_targets=1,noise=2)
X = X.T
ones = np.ones(len(X[0]))
X = np.append(X,[ones],axis=0)
X = X.T
X_test = X[int(len(X)/2):]
Y_test = Y[int(len(Y)/2):]
X = X[:int(len(X)/2)]
Y = Y[:int(len(Y)/2)]

In [13]:
# Alghoritm definition  F = V D U^T
def msum(a,b):
    res = []
    for i in range(len(a)):
        res_sub = []
        for j in range(len(a[i])):
            res_sub.append(a[i][j]+b[i][j])
        res.append(res_sub)
    return np.asarray(res)

def msub(a,b):
    res = []
    for i in range(len(a)):
        res_sub = []
        for j in range(len(a[i])):
            res_sub.append(a[i][j]-b[i][j])
        res.append(res_sub)
    return np.asarray(res)

def mmult(a,b):
    return np.asarray(np.dot(a,b))

def minv(a):
    return np.asarray(linalg.inv(a))

def mI(val,size):
    res = []
    for i in range(size):
        res_sub = []
        for j in range(size):
            res_sub.append(val if i==j else 0)
        res.append(res_sub)
    return np.asarray(res)

def mIs(val,size):
    res = []
    for i in range(size):
        res_sub = []
        for j in range(size):
            res_sub.append((val[i] if i<len(val) else 0) if i==j else 0)
        res.append(res_sub)
    return np.asarray(res)

def ridge(X,y,tau):
    a = mmult(X,X.T)
    a = msum(a,mI(tau,len(a)))
    a = minv(a)
    a = mmult(X.T,a)
    a = mmult(a,y)
    return a

In [14]:
# Calculations
errors = [[x/10 for x in range(0,1001)]]
err_vals = []
err_vecs = []
for i in range(len(errors[0])):
    tau = errors[0][i]
    tcoef = ridge(X,Y,tau)
    err_vals.append(metrics.mean_squared_error(Y_test,[tcoef[1]+x*tcoef[0] for x in X_test[:,0]]))
    err_vecs.append([x for x in tcoef])
errors = np.append(errors,[err_vals],axis=0)

err_vecs_sums = []
for vals in err_vecs[:]:
    subsum = 0
    for val in vals:
        subsum = subsum + abs(val)
    err_vecs_sums.append(subsum)

k = 100
tau_vals_filtered = []
err_vals_filtered = []
err_vecs_filtered = []
for i in range(len(err_vecs_sums)):
    if err_vecs_sums[i]<=k:
        tau_vals_filtered.append(errors[0][i])
        err_vals_filtered.append(err_vals[i])
        err_vecs_filtered.append(err_vecs[i])

min_id = err_vals_filtered.index(np.min(err_vals_filtered[:]))
choosen_tau = tau_vals_filtered[min_id]
coefs = ridge(X,Y,choosen_tau)

In [15]:
# Visualizations
TOOLS="hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,reset,tap,save,"
p = figure(tools=TOOLS)
p.scatter(X[:,0],Y)
p.scatter(X_test[:,0],Y_test,fill_color=["#%02x%02x%02x" % (255,0,0) for w in Y_test])
p.line(X[:,0],coefs[1]+X[:,0]*coefs[0],legend="regression line")

tau = [choosen_tau,choosen_tau]


colors = bokeh.palettes.d3['Category20'][20]
p2 = figure(tools = TOOLS)
p2.line(errors[0],errors[1],legend="Error Squared",line_color=colors[0])
p2.line(tau,[np.min(errors[1])-1000,np.max(errors[1])+1000],legend="choosen tau",line_color=colors[3])
p2.xaxis.axis_label = 'tau'
p2.yaxis.axis_label = 'val'

p3 = figure(tools = TOOLS)
err_sum = []
for vals in err_vecs[:]:
    subsum = 0
    for val in vals:
        subsum = subsum + val
    err_sum.append(subsum)
p3.line(errors[0],err_sum,legend="sum of a",line_color=colors[1])
p3.line(errors[0],[k for x in errors[0]],legend="K",line_color=colors[2])
p3.line(tau,[np.min(err_sum)-1000,np.max(err_sum)+1000],legend="choosen tau",line_color=colors[3])
p3.xaxis.axis_label = 'tau'

output_file("Lasso.html", title="Lasso")
output_notebook()

show(column(p,p2,p3))