# Import packages and classes

In [86]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression


# Provide data

In [87]:
x = np.array([5, 15, 25, 35, 45, 55]).reshape((-1, 1))             # use .reshape((-1,1)) beacuse change 1d to 2d.
y = np.array([5, 20, 14, 32, 22, 38])

In [88]:
x

array([[ 5],
       [15],
       [25],
       [35],
       [45],
       [55]])

In [89]:
y

array([ 5, 20, 14, 32, 22, 38])

In [90]:
x.shape           # 2d-dimention

(6, 1)

In [91]:
y.shape            # 1d-dimention

(6,)


# Create a model and fit it

In [92]:
model = LinearRegression()

In [93]:
model.fit(x,y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [94]:
model = LinearRegression().fit(x,y)

# Get results

In [95]:
r_sq = model.score(x, y)

In [96]:
r_sq

0.71587561374795405

In [97]:
model.intercept_                   # which represents the coefficient, 𝑏₀

5.6333333333333293

In [98]:
model.coef_                     # .coef_, which represents 𝑏₁:

array([ 0.54])

In [99]:
new_model = LinearRegression().fit(x, y.reshape((-1, 1)))

In [100]:
new_model.intercept_

array([ 5.63333333])

In [101]:
new_model.coef_


array([[ 0.54]])

# Predict response

In [102]:
y_pred = model.predict(x)

In [103]:
y_pred

array([  8.33333333,  13.73333333,  19.13333333,  24.53333333,
        29.93333333,  35.33333333])

In [104]:
y_pred1 = model.intercept_ + model.coef_ * x

In [105]:
y_pred1

array([[  8.33333333],
       [ 13.73333333],
       [ 19.13333333],
       [ 24.53333333],
       [ 29.93333333],
       [ 35.33333333]])

In [106]:
x_new = np.arange(5).reshape((-1, 1))

In [107]:
x_new

array([[0],
       [1],
       [2],
       [3],
       [4]])

In [108]:
y_new = model.predict(x_new)

In [109]:
y_new

array([ 5.63333333,  6.17333333,  6.71333333,  7.25333333,  7.79333333])

# Multiple Linear Regression With scikit-learn

# Import packages and classes, and provide data

In [110]:
x = [[0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]]
y = [4, 5, 20, 14, 32, 22, 38, 43]

In [144]:
x, y = np.array(x), np.array(y)

In [145]:
x

array([[ 5],
       [15],
       [25],
       [35],
       [45],
       [55]])

In [146]:
y

array([15, 11,  2,  8, 25, 32])

In [147]:
x.shape

(6, 1)

In [148]:
y.shape

(6,)


# Create a model and fit it

In [119]:
model = LinearRegression().fit(x,y)

# Get results

In [120]:
r_sq = model.score(x,y)

In [121]:
r_sq

0.86159392587567762

In [122]:
model.intercept_

5.5225792751981899

In [123]:
model.coef_

array([ 0.44706965,  0.25502548])

# Predict response

In [124]:
y_pred = model.predict(x)

In [125]:
y_pred

array([  5.77760476,   8.012953  ,  12.73867497,  17.9744479 ,
        23.97529728,  29.4660957 ,  38.78227633,  41.27265006])

In [126]:
y_pred1 = model.intercept_ + np.sum(model.coef_ * x, axis=1)

In [127]:
y_pred1

array([  5.77760476,   8.012953  ,  12.73867497,  17.9744479 ,
        23.97529728,  29.4660957 ,  38.78227633,  41.27265006])

In [128]:
x_new = np.arange(10).reshape((-1, 2))

In [129]:
x_new

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [130]:
y_new = model.predict(x_new)

In [131]:
y_new

array([  5.77760476,   7.18179502,   8.58598528,   9.99017554,  11.3943658 ])

# Polynomial Regression With scikit-learn

# Import pakages and classes

In [132]:
from sklearn.preprocessing import PolynomialFeatures

# Provide data

In [133]:
x = np.array([5, 15, 25, 35, 45, 55]).reshape((-1, 1))
y = np.array([15, 11, 2, 8, 25, 32])

In [134]:
x

array([[ 5],
       [15],
       [25],
       [35],
       [45],
       [55]])

In [135]:
y

array([15, 11,  2,  8, 25, 32])

In [136]:
x.shape

(6, 1)

In [137]:
y.shape

(6,)

# Transform input data

In [138]:
transformer = PolynomialFeatures(degree=2, include_bias=False)

In [139]:
transformer.fit(x)

PolynomialFeatures(degree=2, include_bias=False, interaction_only=False)

In [140]:
x_ = transformer.transform(x)

In [149]:
x_ = PolynomialFeatures(degree=2, include_bias=False).fit_transform(x)

In [150]:
x_

array([[    5.,    25.],
       [   15.,   225.],
       [   25.,   625.],
       [   35.,  1225.],
       [   45.,  2025.],
       [   55.,  3025.]])

In [151]:
# x_ = PolynomialFeatures(degree=2, include_bias=False).fit_transform(x)

In [152]:
# x_

# Create a model and fit it

In [153]:
model = LinearRegression().fit(x_,y)

# Get results

In [154]:
r_sq = model.score(x_, y)

In [155]:
r_sq

0.89085162624985637

In [156]:
model.intercept_

21.372321428571453

In [157]:
model.coef_

array([-1.32357143,  0.02839286])

In [158]:
x_ = PolynomialFeatures(degree=2, include_bias=True).fit_transform(x)

In [159]:
x_

array([[  1.00000000e+00,   5.00000000e+00,   2.50000000e+01],
       [  1.00000000e+00,   1.50000000e+01,   2.25000000e+02],
       [  1.00000000e+00,   2.50000000e+01,   6.25000000e+02],
       [  1.00000000e+00,   3.50000000e+01,   1.22500000e+03],
       [  1.00000000e+00,   4.50000000e+01,   2.02500000e+03],
       [  1.00000000e+00,   5.50000000e+01,   3.02500000e+03]])

In [160]:
model = LinearRegression(fit_intercept=False).fit(x_, y)

In [161]:
r_sq = model.score(x_, y)

In [162]:
r_sq

0.89085162624985637

In [163]:
model.intercept_

0.0

In [164]:
model.coef_

array([ 21.37232143,  -1.32357143,   0.02839286])


# Predict response

In [165]:
y_pred = model.predict(x_)

In [166]:
y_pred

array([ 15.46428571,   7.90714286,   6.02857143,   9.82857143,
        19.30714286,  34.46428571])

# Advanced Linear Regression With statsmodels

In [171]:
import statsmodels.api as sm

# Provide data and transform inputs

In [172]:
x = [[0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]]
y = [4, 5, 20, 14, 32, 22, 38, 43]
x, y = np.array(x), np.array(y)

In [173]:
x

array([[ 0,  1],
       [ 5,  1],
       [15,  2],
       [25,  5],
       [35, 11],
       [45, 15],
       [55, 34],
       [60, 35]])

In [174]:
y

array([ 4,  5, 20, 14, 32, 22, 38, 43])

In [177]:
x1 = sm.add_constant(x)

In [178]:
x1

array([[  1.,   0.,   1.],
       [  1.,   5.,   1.],
       [  1.,  15.,   2.],
       [  1.,  25.,   5.],
       [  1.,  35.,  11.],
       [  1.,  45.,  15.],
       [  1.,  55.,  34.],
       [  1.,  60.,  35.]])

In [179]:
y

array([ 4,  5, 20, 14, 32, 22, 38, 43])

# Create a model and fit it

In [181]:
model = sm.OLS(y,x)

In [182]:
results = model.fit()

# Get results

In [183]:
print(results.summary())

  "anyway, n=%i" % int(n))


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.862
Model:                            OLS   Adj. R-squared:                  0.806
Method:                 Least Squares   F-statistic:                     15.56
Date:                Fri, 12 Jul 2019   Prob (F-statistic):            0.00713
Time:                        18:05:01   Log-Likelihood:                -24.316
No. Observations:                   8   AIC:                             54.63
Df Residuals:                       5   BIC:                             54.87
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
const          5.5226      4.431      1.246      0.2

In [184]:
results.rsquared

0.86159392587567762

In [186]:
results.rsquared_adj

0.8062314962259487

In [187]:
results.params

array([ 5.52257928,  0.44706965,  0.25502548])

# Predict response

In [188]:
results.fittedvalues

array([  5.77760476,   8.012953  ,  12.73867497,  17.9744479 ,
        23.97529728,  29.4660957 ,  38.78227633,  41.27265006])

In [189]:
results.predict(x)

array([  5.77760476,   8.012953  ,  12.73867497,  17.9744479 ,
        23.97529728,  29.4660957 ,  38.78227633,  41.27265006])

In [190]:
x_new = sm.add_constant(np.arange(10).reshape((-1, 2)))

In [191]:
x_new

array([[ 1.,  0.,  1.],
       [ 1.,  2.,  3.],
       [ 1.,  4.,  5.],
       [ 1.,  6.,  7.],
       [ 1.,  8.,  9.]])

In [192]:
 y_new = results.predict(x_new)

In [193]:
y_new

array([  5.77760476,   7.18179502,   8.58598528,   9.99017554,  11.3943658 ])