In [8]:
import numpy as np

# Define the Tikhonov regularization approach
def tikhonov_regularization(x, y, alpha):
    A = np.vstack([np.ones_like(x), x]).T
    # Gamma = alpha*np.eye(2)  # Regularization matrix
    Gamma = np.zeros((2,2))
    Gamma[1:,1:] = alpha*np.eye(1)
    theta = np.linalg.inv(A.T @ A + Gamma.T @ Gamma) @ A.T @ y
    return theta

# Generate or load your training datasets
def generate_datasets(n_datasets, n_samples):
    datasets = []
    for _ in range(n_datasets):
        x = np.random.uniform(-1, 1, n_samples)
        # y = np.sin(np.pi * x) + np.random.normal(0, 0.1, n_samples)  # Add noise to y
        y = np.sin(np.pi * x)
        datasets.append((x, y))
    return datasets

# Calculate bias
def calc_bias(theta_hat):
    x = np.linspace(-1, 1, 1000).reshape(1000,1)
    x_tile = np.hstack((np.ones_like(x),x))
    bias = np.mean((x_tile @ theta_hat-np.sin(np.pi*x))**2)
    return bias

# Define the number of datasets and samples per dataset
n_datasets = 1000
n_samples = 2
alpha = 0.5

# Generate datasets
datasets = generate_datasets(n_datasets, n_samples)

# Initialize lists to store coefficients, predictions, biases, and variances
theta_hats_list = []
predictions_list = []
biases_list = []
variances_list = []

# Train the Tikhonov regularization estimator on each dataset
for i, (x, y) in enumerate(datasets):
    # 1. Train linear regression model
    theta_hat = tikhonov_regularization(x, y, alpha)
    theta_hats_list.append(theta_hat)

    # 2. Predictions
    # Transfer x to x_tile
    x_tile = np.vstack((np.ones_like(x),x)).T
    Y_predict = x_tile @ theta_hat
    predictions_list.append(Y_predict)

    # 3. Bias
    # bias = np.mean((Y_predict - y)**2)
    bias = calc_bias(theta_hat)
    biases_list.append(bias)

    # 4. Variance
    variance = np.var(Y_predict)
    variances_list.append(variance)

# Calculate average coefficients
avg_theta_hat = np.mean(theta_hats_list, axis=0)
bias_of_avg_theta_hat = calc_bias(avg_theta_hat)

# Calculate average predictions
avg_predictions = np.mean(predictions_list, axis=0)

# Calculate overall bias and variance
overall_bias = np.mean(biases_list)
overall_variance = np.mean(variances_list)

print("bias_of_avg_theta_hat:", bias_of_avg_theta_hat)
print("avg_theta_hat:", avg_theta_hat)
print("avg_predictions:", avg_predictions)

print("overall_bias:", overall_bias)
print("overall_variance:", overall_variance)

bias_of_avg_theta_hat: 0.5508242842453827
avg_theta_hat: [0.00386105 0.39194461]
avg_predictions: [-0.0077926   0.00553988]
overall_bias: 0.9153128369886382
overall_variance: 0.09497855782623126


In [4]:
import numpy as np


def calc_bias(avg_hypothesis_a, avg_hypothesis_b):
    x = np.random.uniform(-1, 1, 10000)
    bias = np.mean((avg_hypothesis_a * x + avg_hypothesis_b - x ** 2) ** 2)
    return bias


def calc_metrics(hypothesises_a, hypothesises_b, data_x):
    avg_hypothesis_a = np.mean(hypothesises_a)
    avg_hypothesis_b = np.mean(hypothesises_b)
    bias = calc_bias(avg_hypothesis_a, avg_hypothesis_b)
    variance = 0.5 * (np.mean(((hypothesises_a * data_x[:, 0] + hypothesises_b) - (
                avg_hypothesis_a * data_x[:, 0] + avg_hypothesis_b)) ** 2) + np.mean(
        ((hypothesises_a * data_x[:, 1] + hypothesises_b) - (avg_hypothesis_a * data_x[:, 1] + avg_hypothesis_b)) ** 2))
    risk = bias + variance
    return avg_hypothesis_a, avg_hypothesis_b, bias, variance, risk


data_x = np.random.uniform(-1, 1, size=(10000, 2))
data_y = np.sin(np.pi * data_x)

# hypothesises_a = (data_y[:, 0] - data_y[:, 1]) / (data_x[:, 0] - data_x[:, 1])
hypothesises_a = data_x[:, 0] + data_x[:, 1]
# hypothesises_b = data_y[:, 0] -hypothesises_a*data_x[:, 0]
hypothesises_b = ((data_y[:, 0] + data_y[:, 1]) - hypothesises_a * (data_x[:, 0] + data_x[:, 1])) * 0.5

avg_hypothesis_a, avg_hypothesis_b, bias, variance, risk = calc_metrics(hypothesises_a, hypothesises_b, data_x)
print("Scenario (b) - linear line h(x) = ax+b:")
print("Average hypothesis a:", avg_hypothesis_a)
print("Average hypothesis b:", avg_hypothesis_b)
print("Bias:", bias)
print("Variance:", variance)
print("Risk:", risk)

Scenario (b) - linear line h(x) = ax+b:
Average hypothesis a: -0.0001554852805187565
Average hypothesis b: -0.337859085805639
Bias: 0.5432711761780847
Variance: 0.4066053963440488
Risk: 0.9498765725221335


In [5]:
import numpy as np
def calc_bias(avg_hypothesis):
    x = np.random.uniform(-1, 1, 10000)
    bias = np.mean((avg_hypothesis - x ** 2) ** 2)
    return bias

def calc_metrics(hypothesis):
    avg_hypothesis = np.mean(hypothesis)
    bias = calc_bias(avg_hypothesis)
    variance = np.mean((hypothesis - avg_hypothesis) ** 2)
    risk = bias + variance
    return avg_hypothesis, bias, variance, risk


data_x = np.random.uniform(-1, 1, size=(10000, 2))
data_y = np.sin(np.pi * data_x)

hypothesises = (data_y[:, 0] + data_y[:, 1]) / 2
avg_hypothesis_a, bias_a, variance_a, risk_a = calc_metrics(hypothesises)
print("Scenario (a) - Constant line h(x) = b:")
print("Average hypothesis:", avg_hypothesis_a)
print("Bias:", bias_a)
print("Variance:", variance_a)
print("Risk:", risk_a)

Scenario (a) - Constant line h(x) = b:
Average hypothesis: -0.0036300878225968497
Bias: 0.19590410234900257
Variance: 0.24983767665750384
Risk: 0.4457417790065064
