# Regression example

In [None]:
%matplotlib qt
from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
rc('text', usetex=True)

In [None]:
import sys
sys.path.append("../")
import numpy as np
import time
from matplotlib import pyplot as plt
from sklearn.linear_model import HuberRegressor
from mmhuber.regression_model import generate_samples
from mmhuber.mmalgorithm import hubreg
from mmhuber.least_squares import estimate_beta_sigma
from mmhuber.basics import M_estimating_equations

In [None]:
# Setting the simulation parameters
n_samples = 20000
n_features = 1
n_outliers = 500

c = 1.345
sigma = 20

In [None]:
# Generating data
y, X, beta, sigma = generate_samples(n_samples, n_features, c, sigma, noise='Gaussian', random_state=0)

# Adding outliers
np.random.seed(0)
#index = np.arange(0, n_outliers)
index = np.random.choice(n_samples, n_outliers, replace=False)  
X[index] = 5 + 0.5 * np.random.normal(size=(n_outliers, n_features))
y[index] = -5 + 5 * np.random.normal(size=n_outliers)

In [None]:
# LS estimate
beta_ls, sigma_ls = estimate_beta_sigma(X, y)

In [None]:
# Huber estimate
t_beginning = time.time()
beta_hub, sigma_hub = hubreg(y, X, c, beta_0='LS', sigma_0='LS', mu='optimal', lbda='optimal', 
                             check_decreasing=True, n_iter=100, pbar=True,
                             epsilon=1e-7)
time_mm_huber = time.time() - t_beginning
print(f'\nElapsed {time_mm_huber} seconds')
print(f'M-estimating equations:\n{M_estimating_equations(y, X, beta_hub, sigma_hub, c)}')

In [None]:
# sklearn HuberRegressor   
t_beginning = time.time()
huber = HuberRegressor(epsilon=c).fit(X, y)
time_sklearn_huber = time.time() - t_beginning
beta_hub_sklearn = huber.coef_
sigma_hub_sklearn = huber.scale_
print(f'Elapsed {time_sklearn_huber} seconds')
print(f'M-estimating equations:\n{M_estimating_equations(y, X, beta_hub_sklearn, sigma_hub_sklearn, c)}')

In [None]:
print("Scores for beta:")
print(f"Least squares: {np.linalg.norm(beta - np.squeeze(beta_ls))/np.linalg.norm(beta)}")
print(f"Huber sklearn implementation: {np.linalg.norm(beta - np.squeeze(beta_hub_sklearn))/np.linalg.norm(beta)}")
print(f"Huber MM-implementation: {np.linalg.norm(beta - np.squeeze(beta_hub))/np.linalg.norm(beta)}")

In [None]:
print("Scores for sigma:")
print(f"Least squares: {np.linalg.norm(sigma - np.squeeze(sigma_ls))/np.linalg.norm(sigma)}")
print(f"Huber sklearn implementation: {np.linalg.norm(sigma - np.squeeze(sigma_hub_sklearn))/np.linalg.norm(sigma)}")
print(f"Huber MM-implementation: {np.linalg.norm(sigma - np.squeeze(sigma_hub))/np.linalg.norm(sigma)}")

In [None]:
plt.figure()
plt.scatter(X, y, marker ='o', facecolors='none', edgecolors='b')
plt.plot(X, beta*X, label='True')
plt.plot(X, beta_ls*X, label='Least-squares', linestyle=':', c='r')
plt.plot(X, beta_hub_sklearn*X, label='Sklearn Huber', linestyle=':', c='k')
plt.plot(X, beta_hub*X, label='MM Huber', linestyle=':', c='violet')
plt.legend()