In [1]:
# linear and quadratic regression

In [5]:
import numpy as np 
import matplotlib.pyplot as plt
np.set_printoptions(precision = 3)

In [8]:
# simulate data
x = np.arange(100)
x[:10]

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [7]:
y = 150 + 3 * x + 0.03 * x**2 + 5 * np.random.randn(len(x))
y[:10]

array([152.42 , 153.398, 160.912, 167.2  , 169.879, 168.221, 164.485,
       169.443, 177.685, 185.063])

In [10]:
# design matrix
M1 = np.vstack((np.ones_like(x), x)).T 
M1[:10]

array([[1, 0],
       [1, 1],
       [1, 2],
       [1, 3],
       [1, 4],
       [1, 5],
       [1, 6],
       [1, 7],
       [1, 8],
       [1, 9]])

In [11]:
M2 = np.vstack((np.ones_like(x), x, x**2)).T 
M2[:10]

array([[ 1,  0,  0],
       [ 1,  1,  1],
       [ 1,  2,  4],
       [ 1,  3,  9],
       [ 1,  4, 16],
       [ 1,  5, 25],
       [ 1,  6, 36],
       [ 1,  7, 49],
       [ 1,  8, 64],
       [ 1,  9, 81]])

In [12]:
M3 = np.vstack((np.ones_like(x), x, x**2, x**3)).T 
M3[:10]

array([[  1,   0,   0,   0],
       [  1,   1,   1,   1],
       [  1,   2,   4,   8],
       [  1,   3,   9,  27],
       [  1,   4,  16,  64],
       [  1,   5,  25, 125],
       [  1,   6,  36, 216],
       [  1,   7,  49, 343],
       [  1,   8,  64, 512],
       [  1,   9,  81, 729]])

In [13]:
# solve the equations in np 
p1 = np.linalg.lstsq(M1, y)

  


In [14]:
print('the coefficients from the linear fit: {0}'.format(p1[0]))

the coefficients from the linear fit: [103.307   5.944]


In [15]:
p2 = np.linalg.lstsq(M2, y)

  """Entry point for launching an IPython kernel.


In [19]:
print('the coefficients from the quadratic fit: {0}'.format(p2[0]))

the coefficients from the quadratic fit: [1.535e+02 2.869e+00 3.106e-02]


In [17]:
p3 = np.linalg.lstsq(M3, y)

  """Entry point for launching an IPython kernel.


In [20]:
print('the coefficients from the cubic fit: {0}'.format(p3[0]))

the coefficients from the cubic fit: [ 1.542e+02  2.784e+00  3.323e-02 -1.461e-05]


In [21]:
# comparison in 'statsmodels'

In [22]:
import statsmodels.api as sm 

  import pandas.util.testing as tm


In [23]:
res1 = sm.OLS(y, M1).fit()
print(res1.summary2())

                 Results: Ordinary least squares
Model:              OLS              Adj. R-squared:     0.981   
Dependent Variable: y                AIC:                920.2434
Date:               2022-01-27 14:55 BIC:                925.4538
No. Observations:   100              Log-Likelihood:     -458.12 
Df Model:           1                F-statistic:        5170.   
Df Residuals:       98               Prob (F-statistic): 1.31e-86
R-squared:          0.981            Scale:              569.50  
-------------------------------------------------------------------
         Coef.     Std.Err.      t      P>|t|     [0.025    0.975] 
-------------------------------------------------------------------
const   103.3070     4.7373   21.8073   0.0000   93.9061   112.7080
x1        5.9445     0.0827   71.9040   0.0000    5.7804     6.1085
-----------------------------------------------------------------
Omnibus:               18.001       Durbin-Watson:          0.087
Prob(Omnibus):   

In [24]:
res2 = sm.OLS(y, M2).fit()
res3 = sm.OLS(y, M3).fit()

In [29]:
print('the AIC is {0:4.1f} for the linear fit, \n {1:4.1f} for the quadratic fit, and \n {2:4.1f} for the cubic fit'.format(res1.aic, res2.aic, res3.aic))

the AIC is 920.2 for the linear fit, 
 600.2 for the quadratic fit, and 
 601.9 for the cubic fit


In [None]:
# the best model has the 'lowest' AIC ?