# Regression example with Huber's Least favourable distribution

In [1]:
%matplotlib qt
from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
rc('text', usetex=True)

In [2]:
import sys
sys.path.append("../")
import numpy as np
from matplotlib import pyplot as plt
from tqdm import tqdm
from mmhuber.regression_model import generate_samples
from mmhuber.mmalgorithm import hubreg
from mmhuber.least_squares import estimate_beta_sigma
from sklearn.linear_model import HuberRegressor

In [3]:
# Setting the simulation parameters
n_samples = 100
n_features = 5
n_trials = 48
c_vec = np.linspace(1.01, 1.99, 5)
sigma = 10

In [4]:
error_beta_ls = []
error_sigma_ls = []
error_beta_mm_huber = []
error_sigma_mm_huber = []
error_beta_sklearn_huber = []
error_sigma_sklearn_huber = []
for c in tqdm(c_vec):

    error_beta_ls_temp = 0
    error_sigma_ls_temp = 0
    error_beta_mm_huber_temp = 0
    error_sigma_mm_huber_temp = 0
    error_beta_sklearn_huber_temp = 0
    error_sigma_sklearn_huber_temp = 0
    for trial in range(n_trials):
        # Generating data
        y, X, beta, sigma = generate_samples(n_samples, n_features, c, sigma, noise='lfd', random_state=None)

        # LS estimate
        beta_estimate, sigma_estimate = estimate_beta_sigma(X, y)
        error_beta_ls_temp += np.linalg.norm(beta - beta_estimate, ord=2) / np.linalg.norm(beta, ord=2) 
        error_sigma_ls_temp += (sigma_estimate - sigma)**2 / (sigma**2) 

        # MM-huber estimate
        beta_estimate, sigma_estimate = hubreg(y, X, c, beta_0='LS', sigma_0='LS', mu='optimal', lbda='optimal', 
                             check_decreasing=False, n_iter=100, pbar=False,
                             epsilon=1e-5)
        error_beta_mm_huber_temp += np.linalg.norm(beta - beta_estimate, ord=2) / np.linalg.norm(beta, ord=2) 
        error_sigma_mm_huber_temp += (sigma_estimate - sigma)**2 / (sigma**2)

        # Sklearn huber
        huber = HuberRegressor(epsilon=c).fit(X, y)
        beta_estimate = huber.coef_
        sigma_estimate = huber.scale_
        error_beta_sklearn_huber_temp += np.linalg.norm(beta - beta_estimate, ord=2) / np.linalg.norm(beta, ord=2) 
        error_sigma_sklearn_huber_temp += (sigma_estimate - sigma)**2 / (sigma**2)


    error_beta_ls.append( error_beta_ls_temp / n_trials )
    error_sigma_ls.append( error_sigma_ls_temp / n_trials )
    error_beta_mm_huber.append( error_beta_mm_huber_temp / n_trials )
    error_sigma_mm_huber.append( error_sigma_mm_huber_temp / n_trials )
    error_beta_sklearn_huber.append( error_beta_sklearn_huber_temp / n_trials )
    error_sigma_sklearn_huber.append( error_sigma_sklearn_huber_temp / n_trials )



100%|██████████| 5/5 [12:48<00:00, 153.74s/it]


In [11]:
plt.figure(figsize=(8,4))
plt.loglog(c_vec, error_beta_ls, marker='o', label='LS')
plt.loglog(c_vec, error_beta_mm_huber, marker='s', label='MM-Huber')
plt.loglog(c_vec, error_beta_sklearn_huber, marker='d', label='Sklearn-Huber')
plt.legend()
plt.xlabel('$c$')
plt.ylabel('$\| \hat{\\beta} - \\beta \|^2_2/\| \\beta \|^2_2$')

Text(0, 0.5, '$\\| \\hat{\\beta} - \\beta \\|^2_2/\\| \\beta \\|^2_2$')

In [7]:
plt.figure(figsize=(8,4))
plt.loglog(c_vec, error_sigma_ls, marker='o', label='LS')
plt.loglog(c_vec, error_sigma_mm_huber, marker='s', label='MM-Huber')
plt.loglog(c_vec, error_sigma_sklearn_huber, marker='d', label='Sklearn-Huber')
plt.legend()
plt.xlabel('$c$')
plt.ylabel('$( \hat{\\sigma} - \\sigma )^2/(\\sigma )^2$')

Text(0, 0.5, '$( \\hat{\\sigma} - \\sigma )^2/(\\sigma )^2$')