In [83]:
import numpy as np
import pandas as pd
import os
import plotly.express as px
import plotly.graph_objects as go
import math

DATA_PATH = os.path.abspath('data')

def normalEquation_ridge(matX,vecY,lamda):
    X_transpose = np.transpose(matX)
    X_transpose_dot_X = np.dot(X_transpose,matX)
    
    diagonal_one = np.identity(X_transpose_dot_X.shape[0])
    diagonal_one[0,0] = 0

    left_side = X_transpose_dot_X + lamda*diagonal_one
    right_side = np.dot(X_transpose,vecY)

    ans = np.linalg.inv(left_side).dot(right_side)
    return ans

def generate_feature_matrix(x, degree):
    if degree == 0:
        X = np.ones((len(x), 1))
        return X
    elif degree == 1:
        X = np.column_stack((np.ones(len(x)), x))
        return X
    elif degree > 1:
        X_poly = np.column_stack([x ** d for d in range(0, degree)])
        return X_poly
    else:
        raise ValueError("Degree must be a non-negative integer.")
    
def Standardize(points):
    a = 0
    n = len(points)
    mean = sum(points) / n
    new_points = []
    for i in range(0,n):
        a += (points[i] - mean) ** 2
    std = math.sqrt(a / (n-1))
    for i in range(0,n):
        new_points.append((points[i] - mean) / std)
    return new_points

def CalculateSE_ridge(x_positions, y_positions, w_list, lamda):
    n = len(x_positions)
    x_positions = generate_feature_matrix(x_positions,len(w_list))
    SE = 0
    for i in range(0, n):
        wx = np.dot(x_positions[i],w_list)
        SE += ((wx - y_positions[i]) ** 2)

    for j in range(len(w_list)):
        SE += lamda*w_list[j]**2
        
    return SE

def draw_regression_line(w,name,x):
    x_line = np.linspace(min(x),max(x), 100)
    y_line = w[1] * x_line + w[0]

    regression = go.Scatter(x=x_line, y=y_line, mode='lines', name=name)
    return regression

In [81]:
# Plot sample data
df = pd.read_csv(os.path.join(DATA_PATH, "sin_noiseless.csv"))

# Sample some point with uniform pick
NUMBER_OF_POINT = 15
sample_index = np.floor(np.linspace(0, len(df['x'])-1, NUMBER_OF_POINT))
x = np.array([df['x'][i] for i in sample_index])
y = np.array([df['y'][i] for i in sample_index])

data = go.Scatter(x=x, y=y,mode='markers')
fig = go.Figure(data=[data])
fig.show()

In [84]:
# Draw regresstion line for each lambda

x_feature_matrix = generate_feature_matrix(x, 1)
w_lamda_0 = normalEquation_ridge(x_feature_matrix, y, 0)
w_lamda_10 = normalEquation_ridge(x_feature_matrix, y, 10)
w_lamda_20 = normalEquation_ridge(x_feature_matrix, y, 20)
w_lamda_40 = normalEquation_ridge(x_feature_matrix, y, 40)
w_lamda_400 = normalEquation_ridge(x_feature_matrix, y, 400)

data = go.Scatter(x=x, y=y,mode='markers')

regression1 = draw_regression_line(w_lamda_0,"w_lamda_0",x)
regression2 = draw_regression_line(w_lamda_10,"w_lamda_10",x)
regression3 = draw_regression_line(w_lamda_20,"w_lamda_20",x)
regression4 = draw_regression_line(w_lamda_40,"w_lamda_40",x)
regression5 = draw_regression_line(w_lamda_400,"w_lamda_400",x)

fig = go.Figure(data=[data, regression1, regression2, regression3, regression4, regression5])
fig.show()

In [85]:
#  Cost function graph
w_plot = np.linspace(-0.5, 3, 1000)
lamda_list = [0, 10, 20, 40, 400]
all_line = []

for lamda in lamda_list:
    SE_plot = []
    for w in w_plot:
        cost_function = CalculateSE_ridge(x, y, np.array([0,w]), lamda)
        SE_plot.append(cost_function)
    
    # Create cost function line
    line = go.Scatter(x=w_plot, y=SE_plot,mode='lines', name=f'lamda {lamda}')
    all_line.append(line)

fig = go.Figure(data=all_line)
fig.update_layout(yaxis_range=[0,20])
fig.show()