# Regression example

In [32]:
%matplotlib qt
from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
rc('text', usetex=True)

In [33]:
import sys
sys.path.append("../")
import numpy as np
from matplotlib import pyplot as plt
from sklearn.linear_model import HuberRegressor
from mmhuber.regression_model import generate_samples
from mmhuber.mmalgorithm import hubreg
from mmhuber.least_squares import estimate_beta_sigma

In [34]:
# Setting the simulation parameters
n_samples = 500
n_features = 50

c = 1.1
sigma = 10

In [35]:
# Generating data
y, X, beta, sigma = generate_samples(n_samples, n_features, c, sigma, noise='lfd', random_state=None)

In [36]:
# LS estimate
beta_ls, sigma_ls = estimate_beta_sigma(X, y)

In [37]:
# Huber estimate
estimates_list, L = hubreg(y, X, c, beta_0=None, sigma_0=None, mu='optimal', lbda='optimal',
                        check_decreasing=True, n_iter=1000, return_all=True, pbar=True,
                        epsilon=1e-5)
beta_hub, sigma_hub = estimates_list[-1]

1%|          | 11/1000 [00:00<00:09, 107.08it/s]


In [38]:
# sklearn HuberRegressor   
huber = HuberRegressor(epsilon=c).fit(X, y)
beta_hub_sklearn = huber.coef_
sigma_hub_sklearn = huber.scale_

In [39]:
print("Scores for beta:")
print(f"Least squares: {np.linalg.norm(beta - np.squeeze(beta_ls))/np.linalg.norm(beta)}")
print(f"Huber sklearn implementation: {np.linalg.norm(beta - np.squeeze(beta_hub_sklearn))/np.linalg.norm(beta)}")
print(f"Huber MM-implementation: {np.linalg.norm(beta - np.squeeze(beta_hub))/np.linalg.norm(beta)}")

Scores for beta:
Least squares: 0.011760320791690827
Huber sklearn implementation: 0.011866040417594844
Huber MM-implementation: 0.011548072758916658


In [40]:
print("Scores for sigma:")
print(f"Least squares: {np.linalg.norm(sigma - np.squeeze(sigma_ls))/np.linalg.norm(sigma)}")
print(f"Huber sklearn implementation: {np.linalg.norm(sigma - np.squeeze(sigma_hub_sklearn))/np.linalg.norm(sigma)}")
print(f"Huber MM-implementation: {np.linalg.norm(sigma - np.squeeze(sigma_hub))/np.linalg.norm(sigma)}")

Scores for sigma:
Least squares: 0.34487147312163025
Huber sklearn implementation: 0.7975320972409339
Huber MM-implementation: 0.19518777927456962


In [42]:
plt.figure()
plt.semilogy(np.arange(0,len(L)), L, marker='o')
plt.xlabel('$n_{iter}$')
plt.ylabel('$L$')

Text(0, 0.5, '$L$')