匯入套件

In [18]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from typing import List, Tuple

定義Training的DataSet的Target function

In [19]:
def gen_sindata(n:int=2)-> Tuple[np.ndarray, np.ndarray]:
    '''
    generate n samples of training data (X, Y) 
    where X is uniform random variable in [-1, 1] and Y = sin(pi * X)

    Parameters:
        n:int => number of samples
    Returns:
        X:np.array => n samples of X
        Y:np.array => n samples of Y : sin(pi * X)
    '''
    x = stats.uniform(-1, 2)  # define random variable (均勻分布)
    v = x.rvs((n, 1))  # 從 uniform random variable in [-1, 1]中取n sample的 X
    y = np.sin(np.pi * v)  # Y = sin(pi * X)
    return (v, y)

定義:
The learning algorithm returns the line at the midpoint 
$
b = \frac{\sin(\pi x_1) + \sin(\pi x_2)}{2}
$
as $g^{(D)}$, where the hypothesis $h(x) = b$.


In [20]:
def gen_horizontal_line(y_train:np.ndarray)-> np.ndarray:
    '''
    generate a horizontal line
    
    Parameters:
        y_train:np.ndarray => n samples of Y : sin(pi * X)
    Returns:
        y_pred:np.ndarray => n samples of horizontal line

    '''
    middle_point_y = np.mean(y_train)  # Calculate the mean of y as hypothesis horizontal function 
    y_pred = np.full_like(y_train, middle_point_y)  # Predict y_pred from the horizontal line
    return y_pred

In [25]:
# Calculate bias and variance using the horizontal line hypothesis
cnt = 10000  # train-iteration
all_a_out = []  # list to store all output of hypothesis help to cal expected value and variance

# Training cnt times 
for i in range(cnt):
    # Generate training data (X, Y)
    X, Y = gen_sindata(2)
    # Hypothesis: horizontal line at midpoint (mean of y) to get Y_pred
    a_out = gen_horizontal_line(Y)
    # Store the output of the hypothesis
    all_a_out.append(a_out)

a_bar = np.mean(all_a_out)  # Expected value of predictions
a_var = np.var(all_a_out)  # Variance of predictions
print(f"預測的期望值 a_bar:{a_bar:.4f}, 預測的方差 a_var:{a_var:.4f}")

# Compute bias and variance
cnt = 10000
bias = 0
var = 0
bias_squared = 0
x = np.random.uniform(-1, 1, size=cnt)

for i in range(cnt):
    bias += (a_bar - np.sin(np.pi * x[i]))
    var += a_var
    bias_squared += (a_bar - np.sin(np.pi * x[i])) ** 2

bias = bias / cnt
var = var / cnt
bias_squared = bias_squared / cnt

error = bias_squared + var

print(f'bias = {bias:.4f}, var = {var:.3f}, error = {error:.4f}')


預測的期望值 a_bar:0.0020, 預測的方差 a_var:0.2472
bias = -0.0014, var = 0.247, error = 0.7480
