### Simple Linear Regression With scikit-learn

In [1]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [2]:
# You should call .reshape() on x because this array is required to be two-dimensional, or to be more precise, to have one column and as many rows as necessary.
x = np.array([5, 15, 25, 35, 45, 55]).reshape((-1, 1))
y = np.array([5, 20, 14, 32, 22, 38])

In [4]:
# fit_intercept is a Boolean (True by default) that decides whether to calculate the intercept 𝑏₀ (True) or consider it equal to zero (False).
# normalize is a Boolean (False by default) that decides whether to normalize the input variables (True) or not (False).
# copy_X is a Boolean (True by default) that decides whether to copy (True) or overwrite the input variables (False).
# n_jobs is an integer or None (default) and represents the number of jobs used in parallel computation. None usually means one job and -1 to use all processors.

In [7]:
model = LinearRegression()
model.fit(x, y)
# above two lines can be replaced by the below one
# model = LinearRegression().fit(x, y)

LinearRegression()

In [8]:
print('coefficient of determination:', model.score(x, y))

coefficient of determination: 0.7158756137479542


In [10]:
print('intercept:', model.intercept_)
print('slope:', model.coef_)

intercept: 5.633333333333329
slope: [0.54]


In [11]:
y_pred = model.predict(x)
print('predicted response:', y_pred, sep='\n')

predicted response:
[ 8.33333333 13.73333333 19.13333333 24.53333333 29.93333333 35.33333333]


In [12]:
y_pred = model.intercept_ + model.coef_ * x
print('predicted response:', y_pred, sep='\n')

predicted response:
[[ 8.33333333]
 [13.73333333]
 [19.13333333]
 [24.53333333]
 [29.93333333]
 [35.33333333]]


### Multiple Linear Regression With scikit-learn

In [13]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [14]:
x = [[0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]]
y = [4, 5, 20, 14, 32, 22, 38, 43]
x, y = np.array(x), np.array(y)

In [15]:
model = LinearRegression().fit(x, y)

In [16]:
print('coefficient of determination:', model.score(x, y))

coefficient of determination: 0.8615939258756776


In [17]:
print('intercept:', model.intercept_)
print('slope:', model.coef_)

intercept: 5.52257927519819
slope: [0.44706965 0.25502548]


In [18]:
y_pred = model.predict(x)
print('predicted response:', y_pred, sep='\n')

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


In [19]:
y_pred = model.predict(x)
print('predicted response:', y_pred, sep='\n')

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


### Polynomial Regression With scikit-learn

In [20]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [21]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [22]:
x = np.array([5, 15, 25, 35, 45, 55]).reshape((-1, 1))
y = np.array([15, 11, 2, 8, 25, 32])

In [24]:
#  We need to include 𝑥² (and perhaps other terms) as additional features when implementing polynomial regression. For that reason, you should transform the input array x to contain the additional column(s) with the values of 𝑥² (and eventually more features).
# degree is an integer (2 by default) that represents the degree of the polynomial regression function.
# interaction_only is a Boolean (False by default) that decides whether to include only interaction features (True) or all features (False).
# include_bias is a Boolean (True by default) that decides whether to include the bias (intercept) column of ones (True) or not (False).
transformer = PolynomialFeatures(degree=2, include_bias=False)

In [25]:
x_ = PolynomialFeatures(degree=2, include_bias=False).fit_transform(x)

In [26]:
print(x_)

[[   5.   25.]
 [  15.  225.]
 [  25.  625.]
 [  35. 1225.]
 [  45. 2025.]
 [  55. 3025.]]


In [27]:
model = LinearRegression().fit(x_, y)

In [28]:
print('coefficient of determination:', model.score(x_, y))

coefficient of determination: 0.8908516262498564


In [29]:
print('intercept:', model.intercept_)
print('coefficients:', model.coef_)

intercept: 21.372321428571425
coefficients: [-1.32357143  0.02839286]


In [30]:
y_pred = model.predict(x_)
print('predicted response:', y_pred, sep='\n')

predicted response:
[15.46428571  7.90714286  6.02857143  9.82857143 19.30714286 34.46428571]


### Advanced Linear Regression With ***statsmodels***

In [36]:
import numpy as np
import statsmodels.api as sm

In [37]:
x = [[0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]]
y = [4, 5, 20, 14, 32, 22, 38, 43]
x, y = np.array(x), np.array(y)

In [38]:
# You need to add the column of ones to the inputs if you want statsmodels to calculate the intercept 𝑏₀. It doesn’t takes 𝑏₀ into account by default. 
x = sm.add_constant(x)

In [39]:
model = sm.OLS(y, x)
results = model.fit()

In [41]:
# The warning related to kurtosistest is due to the small number of observations provided.
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.862
Model:                            OLS   Adj. R-squared:                  0.806
Method:                 Least Squares   F-statistic:                     15.56
Date:                Fri, 08 Jan 2021   Prob (F-statistic):            0.00713
Time:                        13:57:20   Log-Likelihood:                -24.316
No. Observations:                   8   AIC:                             54.63
Df Residuals:                       5   BIC:                             54.87
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          5.5226      4.431      1.246      0.2



In [42]:
print('coefficient of determination:', results.rsquared)
print('adjusted coefficient of determination:', results.rsquared_adj)
print('regression coefficients:', results.params)

coefficient of determination: 0.8615939258756776
adjusted coefficient of determination: 0.8062314962259487
regression coefficients: [5.52257928 0.44706965 0.25502548]


In [43]:
print('predicted response:', results.fittedvalues, sep='\n')
print('predicted response:', results.predict(x), sep='\n')

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]
predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]
