simulations with difference loss functions via SGD. record bias and variance of the estimated parameters from 500 simulations for sample size n=500 and 2000, respectively. Summarize them in a table.

In [1]:
import numpy as np
import pandas as pd

# Define the true model parameters
true_beta = np.array([2.0, -1.0, 0.5])  # True coefficients
n_simulations = 500
sample_sizes = [500, 2000]

# Define loss functions and their gradients
def mse_loss(y, y_pred):
    return np.mean((y - y_pred) ** 2)

def mse_gradient(X, y, beta):
    return -2 * X.T @ (y - X @ beta) / len(y)

def huber_loss(y, y_pred, delta=1.0):
    error = y - y_pred
    is_small_error = np.abs(error) <= delta
    return np.mean(
        np.where(is_small_error, 0.5 * error**2, delta * (np.abs(error) - 0.5 * delta))
    )

def huber_gradient(X, y, beta, delta=1.0):
    error = y - X @ beta
    grad = np.where(np.abs(error) <= delta, error, delta * np.sign(error))
    return -X.T @ grad / len(y)


In [4]:
# Define SGD function
def sgd(X, y, loss_gradient, lr=0.01, n_epochs=100, batch_size=32):
    beta = np.zeros(X.shape[1])  # Initialize beta
    
    n_samples = X.shape[0]
    
    for _ in range(n_epochs):
        # Shuffle the data
        indices = np.random.permutation(n_samples)
        X_shuffled = X[indices]
        y_shuffled = y[indices]
        
        # Mini-batch loop
        for start in range(0, n_samples, batch_size):
            end = start + batch_size
            X_batch = X_shuffled[start:end]
            y_batch = y_shuffled[start:end]
            
            # Compute gradient on mini-batch
            grad = loss_gradient(X_batch, y_batch, beta)
            beta -= lr * grad  # Update weights
            
    return beta

# Perform simulations
results = []

for n in sample_sizes:
    for loss_name, loss_grad in [("MSE", mse_gradient), ("Huber", huber_gradient)]:
        estimates = []
        for _ in range(n_simulations):
            X = np.random.randn(n, len(true_beta))
            y = X @ true_beta + np.random.randn(n)  # Add noise
            
            beta_est = sgd(X, y, loss_grad)
            estimates.append(beta_est)
        
        estimates = np.array(estimates)
        bias = np.mean(estimates, axis=0) - true_beta
        variance = np.var(estimates, axis=0)
        
        results.append({
            "Sample Size": n,
            "Loss Function": loss_name,
            "Bias": np.linalg.norm(bias),
            "Variance": np.mean(variance)
        })

# Summarize results in a table
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,Sample Size,Loss Function,Bias,Variance
0,500,MSE,0.003489,0.001995
1,500,Huber,0.004441,0.002145
2,2000,MSE,0.001393,0.000544
3,2000,Huber,0.001538,0.000555
