<img src="http://imgur.com/1ZcRyrc.png" style="float: left; margin: 20px; height: 55px">

# Visualisation of influence of outliers on least squares regression and least absolute deviations regression

_Instructor: Aymeric Flaisler_

---

In [1]:
import numpy as np
import scipy.stats as stats
import seaborn as sns
import pandas as pd
import statsmodels.api as sm

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches

%config InlineBackend.figure_format = 'retina'
%matplotlib inline

sns.set_style('darkgrid')

from ipywidgets import *
from IPython.display import display

  from pandas.core import datetools


In [2]:
np.random.seed(1)
x = np.arange(1,16)
y = x.copy()
y = y + np.random.normal(5,2,size=len(x))

In [3]:
x

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [4]:
y

array([ 9.24869073,  5.77648717,  6.9436565 ,  6.85406276, 11.73081526,
        6.39692261, 15.48962353, 11.4775862 , 14.63807819, 14.50125925,
       18.92421587, 12.87971858, 17.35516559, 18.23189129, 22.26753888])

In [5]:
def plot_regression(x, y, pmin, pmax):

    fig = plt.figure(figsize=(10,10))
    
    ax = fig.gca()

    ax.set_xlim([pmin, pmax])
    ax.set_ylim([pmin, pmax])
    
    sqmod = sm.OLS(y, x).fit()
    absmod = sm.QuantReg(y, x).fit(q=0.5)
    
    ax.scatter(x, y, s=70, color='steelblue')
    
    ax.plot([pmin, pmax], [absmod.predict(pmin)[0], absmod.predict(pmax)[0]],
            color='darkgoldenrod', lw=2, alpha=0.5, label='LAD regression')
    ax.plot([pmin, pmax], [sqmod.predict(pmin)[0], sqmod.predict(pmax)[0]],
            color='darkred', lw=2, alpha=0.5, label='OLS regression')

    
    ax.axvline(0, lw=2, c='black', linestyle='dashed')
    ax.axhline(0, lw=2, c='black', linestyle='dashed')
    
    for x_, y_, ys, ya in zip(x, y, sqmod.fittedvalues, absmod.fittedvalues):
        
        ax.plot([x_, x_], [y_, ys], color='darkred', linestyle='dotted', linewidth=1.5)
        ax.plot([x_, x_], [y_, ya], color='darkgoldenrod', linestyle='dotted', linewidth=1.5)
    
    plt.legend(loc='center right')
    plt.xlabel('x',fontsize=20)
    plt.ylabel('y',fontsize=20)
    plt.tight_layout()
    
    fig.show()


In [6]:
def plot_delegator(outlier):
    yr = y.copy()
    yr[len(x)-1] += outlier
    plot_regression(x, yr, -5, 100)
    

In [7]:
interact(plot_delegator,
         outlier = widgets.FloatSlider(min=0, max=500, 
                                     step=1, value=0, continuous_update=True))
plt.show()

In [15]:
absmod = sm.QuantReg(y, x).fit(q=.5)

In [16]:
absmod.summary()

0,1,2,3
Dep. Variable:,y,Pseudo R-squared:,0.003154
Model:,QuantReg,Bandwidth:,10.73
Method:,Least Squares,Sparsity:,19.03
Date:,"Tue, 27 Mar 2018",No. Observations:,15.0
Time:,20:12:49,Df Residuals:,14.0
,,Df Model:,1.0

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,1.7135,0.270,6.340,0.000,1.134,2.293
