In [1]:
import math
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import sympy
from scipy import integrate
import sys

# 1. หาค่าความเอนเอียงและความแปรปรวนด้วย analytical method และ simulation ของแบบจำลองค่าคงที่และแบบจำลองเชิงเส้น แบบจำลองเชิงเส้นผ่านจุดกำเนิด

## Analytical Method

**Constant Model, Target f(x) = sin(pi*x)**

In [2]:
# Constant Model, Target f(x) = sin(pi*x)

x = sympy.Symbol("x")
x1 = sympy.Symbol("x1")
x2 = sympy.Symbol("x2")

c_model = (sympy.sin(sympy.pi*x1) + sympy.sin(sympy.pi*x2)) / 2
g_bar = (1/4)*sympy.integrate(c_model, (x1, -1, 1), (x2, -1, 1))
print("g_bar:", g_bar)

bias_order_2 = (g_bar - sympy.sin(sympy.pi*x))**2
expected_bias_order_2 = (1/2) * sympy.integrate(bias_order_2, (x, -1, 1))
print("bias^2:", expected_bias_order_2)

var = (1/4) * sympy.integrate(c_model**2, (x1, -1, 1), (x2, -1, 1)) - g_bar ** 2
variance = (1/2) * sympy.integrate(var, (x, -1, 1))
print("variance:", variance)

E_over_dataset = expected_bias_order_2 + variance
print("E_over_dataset:", E_over_dataset)

g_bar: 0
bias^2: 0.500000000000000
variance: 0.250000000000000
E_over_dataset: 0.750000000000000


In [3]:
g_bar_vec = np.vectorize(sympy.lambdify([x], g_bar, "numpy"))
var_vec = np.vectorize(sympy.lambdify([x], var, "numpy"))

# Sample data for the x-axis
x_vals = np.linspace(-1, 1, 1000)

# Sample data for the upper and lower bounds of the filled area
upper_bound = g_bar_vec(x_vals) + np.sqrt(var_vec(x_vals))
lower_bound = g_bar_vec(x_vals) - np.sqrt(var_vec(x_vals))

# Create the area graph using Plotly
fig = go.Figure()

# Add the filled area trace
fig.add_trace(go.Scatter(x=x_vals, y=upper_bound, mode='lines', line=dict(color='rgba(0, 0, 255, 0)'), name='Upper Bound'))
fig.add_trace(go.Scatter(x=x_vals, y=lower_bound, mode='lines', line=dict(color='rgba(0, 0, 255, 0)'), name='Lower Bound'))
fig.add_trace(go.Scatter(x=x_vals, y=upper_bound, fill='tonexty', mode='none', fillcolor='rgba(0, 0, 255, 0.3)', name='Area'))

# Add the function line
fig.add_trace(go.Scatter(x=x_vals, y= np.sin(math.pi * x_vals), mode='lines', line=dict(color='rgba(255, 0, 0, 1)'), name='Target Function'))
fig.add_trace(go.Scatter(x=x_vals, y=g_bar_vec(x_vals), mode='lines', line=dict(color='rgba(0, 0, 255, 1)'), name='g_bar'))


# Customize layout
fig.update_layout(title='Constant Model, Target f(x) = sin(pi*x)',
                  xaxis_title='X-axis',
                  yaxis_title='Y-axis',
                  legend=dict(x=0, y=1),
                  showlegend=False,
                  yaxis_range=[-2,2],
                  margin=go.layout.Margin(
                  l=0, #left margin
                  r=400, #right margin
                  b=0, #bottom margin
                  t=30, #top margin
                ))

# Show the plot
fig.show()

**Constant Model, Target f(x) = x^2**

In [4]:
# Constant Model, Target f(x) = x^2

x = sympy.Symbol("x")
x1 = sympy.Symbol("x1")
x2 = sympy.Symbol("x2")

c_model = (x1**2 + x2**2) / 2
g_bar = (1/4)*sympy.integrate(c_model, (x1, -1, 1), (x2, -1, 1))
print("g_bar:", g_bar)

bias_order_2 = (g_bar - x**2)**2
expected_bias_order_2 = (1/2) * sympy.integrate(bias_order_2, (x, -1, 1))
print("bias^2:", expected_bias_order_2)

var = (1/4) * sympy.integrate(c_model**2, (x1, -1, 1), (x2, -1, 1)) - g_bar ** 2
variance = (1/2) * sympy.integrate(var, (x, -1, 1))
print("variance:", variance)

E_over_dataset = expected_bias_order_2 + variance
print("E_over_dataset:", E_over_dataset)

g_bar: 0.333333333333333
bias^2: 0.0888888888888889
variance: 0.0444444444444445
E_over_dataset: 0.133333333333333


In [5]:
g_bar_vec = np.vectorize(sympy.lambdify([x], g_bar, "numpy"))
var_vec = np.vectorize(sympy.lambdify([x], var, "numpy"))

# Sample data for the x-axis
x_vals = np.linspace(-1, 1, 1000)

# Sample data for the upper and lower bounds of the filled area
upper_bound = g_bar_vec(x_vals) + np.sqrt(var_vec(x_vals))
lower_bound = g_bar_vec(x_vals) - np.sqrt(var_vec(x_vals))

# Create the area graph using Plotly
fig = go.Figure()

# Add the filled area trace
fig.add_trace(go.Scatter(x=x_vals, y=upper_bound, mode='lines', line=dict(color='rgba(0, 0, 255, 0)'), name='Upper Bound'))
fig.add_trace(go.Scatter(x=x_vals, y=lower_bound, mode='lines', line=dict(color='rgba(0, 0, 255, 0)'), name='Lower Bound'))
fig.add_trace(go.Scatter(x=x_vals, y=upper_bound, fill='tonexty', mode='none', fillcolor='rgba(0, 0, 255, 0.3)', name='Area'))

# Add the function line
fig.add_trace(go.Scatter(x=x_vals, y=x_vals**2, mode='lines', line=dict(color='rgba(255, 0, 0, 1)'), name='Target Function'))
fig.add_trace(go.Scatter(x=x_vals, y=g_bar_vec(x_vals), mode='lines', line=dict(color='rgba(0, 0, 255, 1)'), name='g_bar'))


# Customize layout
fig.update_layout(title='Constant Model, Target f(x) = x^2',
                  xaxis_title='X-axis',
                  yaxis_title='Y-axis',
                  legend=dict(x=0, y=1),
                  showlegend=False,
                  yaxis_range=[-2,2],
                  margin=go.layout.Margin(
                  l=0, #left margin
                  r=400, #right margin
                  b=0, #bottom margin
                  t=30, #top margin
                ))

# Show the plot
fig.show()

**Linear Model through the origin, Target f(x) = sin(pi*x)**

In [6]:
# Linear Model through the origin, Target f(x) = sin(pi*x)

x = sympy.Symbol("x", real = True)
x1 = sympy.Symbol("x1")
x2 = sympy.Symbol("x2")

slope_sym = (((x1 * sympy.sin(sympy.pi*x1)) + (x2 * sympy.sin(sympy.pi*x2))) / (x1 ** 2 + x2 ** 2))
slope = sympy.lambdify([x1,x2], slope_sym, "numpy")
model = slope_sym * x
g_bar = (1/4) * (integrate.dblquad(slope, -1, 0, -1, 1)[0] + integrate.dblquad(slope, 0, 1, -1, 1)[0]) * x
print("g_bar:", g_bar)

bias_order_2 = (g_bar - sympy.sin(sympy.pi*x))**2
expected_bias_order_2 = sympy.N((1/2) * sympy.integrate(bias_order_2, (x, -1, 1)))
print("bias^2:", expected_bias_order_2)

prep = sympy.lambdify([x1,x2], slope_sym**2, "numpy")
var = (1/4) * (integrate.dblquad(prep, -1, 1, 0, 1)[0] + integrate.dblquad(prep, -1, 1, -1, 0)[0]) * x**2 - g_bar ** 2
variance = (1/2) * sympy.integrate(var, (x, -1, 1))
print("variance:", variance)

E_over_dataset = expected_bias_order_2 + variance
print("E_over_dataset:", E_over_dataset)

g_bar: 1.42802717484247*x
bias^2: 0.270643535713256
variance: 0.236575925913684
E_over_dataset: 0.507219461626941


In [7]:
g_bar_vec = np.vectorize(sympy.lambdify([x], g_bar, "numpy"))
var_vec = np.vectorize(sympy.lambdify([x], var, "numpy"))

# Sample data for the x-axis
x_vals = np.linspace(-1, 1, 1000)

# Sample data for the upper and lower bounds of the filled area
upper_bound = g_bar_vec(x_vals) + np.sqrt(var_vec(x_vals))
lower_bound = g_bar_vec(x_vals) - np.sqrt(var_vec(x_vals))

# Create the area graph using Plotly
fig = go.Figure()

# Add the filled area trace
fig.add_trace(go.Scatter(x=x_vals, y=upper_bound, mode='lines', line=dict(color='rgba(0, 0, 255, 0)'), name='Upper Bound'))
fig.add_trace(go.Scatter(x=x_vals, y=lower_bound, mode='lines', line=dict(color='rgba(0, 0, 255, 0)'), name='Lower Bound'))
fig.add_trace(go.Scatter(x=x_vals, y=upper_bound, fill='tonexty', mode='none', fillcolor='rgba(0, 0, 255, 0.3)', name='Area'))

# Add the function line
fig.add_trace(go.Scatter(x=x_vals, y= np.sin(math.pi * x_vals), mode='lines', line=dict(color='rgba(255, 0, 0, 1)'), name='Target Function'))
fig.add_trace(go.Scatter(x=x_vals, y=g_bar_vec(x_vals), mode='lines', line=dict(color='rgba(0, 0, 255, 1)'), name='g_bar'))


# Customize layout
fig.update_layout(title='Linear Model through the origin, Target f(x) = sin(pi*x)',
                  xaxis_title='X-axis',
                  yaxis_title='Y-axis',
                  legend=dict(x=0, y=1),
                  showlegend=False,
                  yaxis_range=[-2,2],
                  margin=go.layout.Margin(
                  l=0, #left margin
                  r=400, #right margin
                  b=0, #bottom margin
                  t=30, #top margin
                ))

# Show the plot
fig.show()

**Linear Model through the origin, Target f(x) = x^2**

In [8]:
# Linear Model through the origin, Target f(x) = x^2

x = sympy.Symbol("x", real = True)
x1 = sympy.Symbol("x1")
x2 = sympy.Symbol("x2")
w1 = sympy.Symbol("w1")

model = (((x1 ** 3) + (x2 **3)) * x / (x1 ** 2 + x2 ** 2))
g_bar = sympy.simplify((1/4)*sympy.integrate(model, (x1, -1, 1), (x2, -1, 1)))
print("g_bar:", g_bar)

bias_order_2 = (g_bar - x**2)**2
expected_bias_order_2 = (1/2) * sympy.integrate(bias_order_2, (x, -1, 1))
print("bias^2:", expected_bias_order_2)

#(1/4) * sympy.integrate(model**2, (x1, -1, 1), (x2, -1, 1)) - g_bar ** 2
test = sympy.lambdify([x1, x2], model**2 / x**2, "numpy")
var = (1/4)*(integrate.dblquad(test, -1, 0, -1, 1)[0] + integrate.dblquad(test, 0, 1, -1, 1)[0]) * x ** 2
variance = (1/2) * sympy.integrate(var, (x, -1, 1))
print("variance:", variance)

E_over_dataset = expected_bias_order_2 + variance
print("E_over_dataset:", E_over_dataset)

g_bar: 0
bias^2: 0.200000000000000
variance: 0.114921303921007
E_over_dataset: 0.314921303921007


In [9]:
g_bar_vec = np.vectorize(sympy.lambdify([x], g_bar, "numpy"))
var_vec = np.vectorize(sympy.lambdify([x], var, "numpy"))

# Sample data for the x-axis
x_vals = np.linspace(-1, 1, 1000)

# Sample data for the upper and lower bounds of the filled area
upper_bound = g_bar_vec(x_vals) + np.sqrt(var_vec(x_vals))
lower_bound = g_bar_vec(x_vals) - np.sqrt(var_vec(x_vals))

# Create the area graph using Plotly
fig = go.Figure()

# Add the filled area trace
fig.add_trace(go.Scatter(x=x_vals, y=upper_bound, mode='lines', line=dict(color='rgba(0, 0, 255, 0)'), name='Upper Bound'))
fig.add_trace(go.Scatter(x=x_vals, y=lower_bound, mode='lines', line=dict(color='rgba(0, 0, 255, 0)'), name='Lower Bound'))
fig.add_trace(go.Scatter(x=x_vals, y=upper_bound, fill='tonexty', mode='none', fillcolor='rgba(0, 0, 255, 0.3)', name='Area'))

# Add the function line
fig.add_trace(go.Scatter(x=x_vals, y=x_vals**2, mode='lines', line=dict(color='rgba(255, 0, 0, 1)'), name='Target Function'))
fig.add_trace(go.Scatter(x=x_vals, y=g_bar_vec(x_vals), mode='lines', line=dict(color='rgba(0, 0, 255, 1)'), name='g_bar'))


# Optional: Customize layout
fig.update_layout(title='Linear Model through the origin, Target f(x) = x^2',
                  xaxis_title='X-axis',
                  yaxis_title='Y-axis',
                  legend=dict(x=0, y=1),
                  showlegend=False,
                  yaxis_range=[-2,2],
                  margin=go.layout.Margin(
                  l=0, #left margin
                  r=400, #right margin
                  b=0, #bottom margin
                  t=30, #top margin
                ))

# Show the plot
fig.show()

**Linear Model, Target f(x) = sin(pi*x)**

In [10]:
# Linear Model, Target f(x) = sin(pi*x)

x = sympy.Symbol("x", real = True)
x1 = sympy.Symbol("x1")
x2 = sympy.Symbol("x2")

slope = (sympy.sin(sympy.pi*x1) - sympy.sin(sympy.pi*x2)) / (x1 - x2)
model = (sympy.sin(sympy.pi*x2) - slope * x2) + slope * x # w0 + w1x

inte_slope = integrate.dblquad(sympy.lambdify([x1, x2], slope), -1, 0, -1, 1)[0] + integrate.dblquad(sympy.lambdify([x1, x2], slope), 0, 1, -1, 1)[0]
inte_slope_time_x2 = integrate.dblquad(sympy.lambdify([x1, x2], slope * x2), -1, 0, -1, 1)[0] + integrate.dblquad(sympy.lambdify([x1, x2], slope * x2), 0, 1, -1, 1)[0]

g_bar = (1/4)*(sympy.integrate(sympy.sin(sympy.pi*x2), (x1, -1, 1), (x2, -1, 1)) - inte_slope_time_x2 + inte_slope * x)
print("g_bar:", g_bar)

bias_order_2 = (g_bar - sympy.sin(sympy.pi*x))**2
expected_bias_order_2 = sympy.N((1/2) * sympy.integrate(bias_order_2, (x, -1, 1)))
print("bias^2:", expected_bias_order_2)

pre_var = sympy.lambdify([x1, x2, x], (sympy.simplify(model) - g_bar)**2, "numpy")
variance = (1/8) * (integrate.tplquad(pre_var, -1, 1, 0, 1, -1, 1)[0] + integrate.tplquad(pre_var, -1, 1, -1, 0, -1, 1)[0])
print("variance:", variance)

E_over_dataset = expected_bias_order_2 + variance
print("E_over_dataset:", E_over_dataset)

g_bar: 0.775929174099576*x
bias^2: 0.206716840217646
variance: 1.676282395045049
E_over_dataset: 1.88299923526269


**Linear Model, Target f(x) = x^2**

In [11]:
# Linear Model, Target f(x) = x^2

x = sympy.Symbol("x")
x1 = sympy.Symbol("x1")
x2 = sympy.Symbol("x2")

slope = (x1**2 - x2**2) / (x1 - x2)
model = (x2**2 - slope * x2) + slope * x # w0 + w1x
g_bar = (1/4)*sympy.integrate(model, (x1, -1, 1), (x2, -1, 1))
print("g_bar:", g_bar)

bias_order_2 = (g_bar - x**2)**2
expected_bias_order_2 = (1/2) * sympy.integrate(bias_order_2, (x, -1, 1))
print("bias^2:", expected_bias_order_2)

var = (1/4) * sympy.integrate(model**2, (x1, -1, 1), (x2, -1, 1)) - g_bar ** 2
variance = (1/2) * sympy.integrate(var, (x, -1, 1))
print("variance:", variance)

E_over_dataset = expected_bias_order_2 + variance
print("E_over_dataset:", E_over_dataset)

g_bar: 0
bias^2: 0.200000000000000
variance: 0.333333333333333
E_over_dataset: 0.533333333333333


In [12]:
g_bar_vec = np.vectorize(sympy.lambdify([x], g_bar, "numpy"))
var_vec = np.vectorize(sympy.lambdify([x], var, "numpy"))

# Sample data for the x-axis
x_vals = np.linspace(-1, 1, 1000)

# Sample data for the upper and lower bounds of the filled area
upper_bound = g_bar_vec(x_vals) + np.sqrt(var_vec(x_vals))
lower_bound = g_bar_vec(x_vals) - np.sqrt(var_vec(x_vals))

# Create the area graph using Plotly
fig = go.Figure()

# Add the filled area trace
fig.add_trace(go.Scatter(x=x_vals, y=upper_bound, mode='lines', line=dict(color='rgba(0, 0, 255, 0)'), name='Upper Bound'))
fig.add_trace(go.Scatter(x=x_vals, y=lower_bound, mode='lines', line=dict(color='rgba(0, 0, 255, 0)'), name='Lower Bound'))
fig.add_trace(go.Scatter(x=x_vals, y=upper_bound, fill='tonexty', mode='none', fillcolor='rgba(0, 0, 255, 0.3)', name='Area'))

# Add the function line
fig.add_trace(go.Scatter(x=x_vals, y=x_vals**2, mode='lines', line=dict(color='rgba(255, 0, 0, 1)'), name='Target Function'))
fig.add_trace(go.Scatter(x=x_vals, y=g_bar_vec(x_vals), mode='lines', line=dict(color='rgba(0, 0, 255, 1)'), name='g_bar'))


# Customize layout
fig.update_layout(title='Linear Model, Target f(x) = x^2',
                  xaxis_title='X-axis',
                  yaxis_title='Y-axis',
                  legend=dict(x=0, y=1),
                  showlegend=False,
                  yaxis_range=[-2,2],
                  margin=go.layout.Margin(
                  l=0, #left margin
                  r=400, #right margin
                  b=0, #bottom margin
                  t=30, #top margin
                ))

# Show the plot
fig.show()