### Linear Regression Basic

### Simple Linear Regression With scikit-learn

#### Import Packages

In [2]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [7]:
x = np.array([5,15,25,35,45,55]).reshape(-1,1)
y = np.array([5,20,14,32,22,38])

In [8]:
print(x)

[[ 5]
 [15]
 [25]
 [35]
 [45]
 [55]]


In [9]:
print(y)

[ 5 20 14 32 22 38]


In [10]:
model = LinearRegression()

In [11]:
model.fit(x,y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [12]:
#model = LinearRegression().fit(x,y)

In [13]:
#Coefficient of determination (𝑅²)
r_sq = model.score(x,y)
print('Coefficient of determination (𝑅²) : ', r_sq)

Coefficient of determination (𝑅²) :  0.7158756137479542


In [14]:
print("Intercept Value: ", model.intercept_)
print("Slope Value: ", model.coef_)

Intercept Value:  5.633333333333329
Slope Value:  [0.54]


In [15]:
y_pred = model.intercept_ + model.coef_ * x

In [16]:
print('Predicted Response :', y_pred, sep='\n')

Predicted Response :
[[ 8.33333333]
 [13.73333333]
 [19.13333333]
 [24.53333333]
 [29.93333333]
 [35.33333333]]


In [18]:
x_new = np.arange(5).reshape(-1,1)
print(x_new)

[[0]
 [1]
 [2]
 [3]
 [4]]


In [19]:
y_new_pred = model.predict(x_new)

In [20]:
print(y_new_pred)

[5.63333333 6.17333333 6.71333333 7.25333333 7.79333333]


### Multiple Linear Regression With scikit-learn

In [23]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [32]:
x = [[0,1], [5,1], [5,2], [25,5], [35,1], [45,15], [55,34], [60,35]]
y = [4,5,20,14,32,22,38,43]
x,y = np.array(x), np.array(y)
print(x)
print(y)

[[ 0  1]
 [ 5  1]
 [ 5  2]
 [25  5]
 [35  1]
 [45 15]
 [55 34]
 [60 35]]
[ 4  5 20 14 32 22 38 43]


In [33]:
model = LinearRegression().fit(x,y)

In [34]:
#Coefficient of determination (𝑅²)
r_sq = model.score(x,y)
print('Coefficient of determination (𝑅²) : ', r_sq)
print("Intercept Value: ", model.intercept_)
print("Slope Value: ", model.coef_)

Coefficient of determination (𝑅²) :  0.7876988912237295
Intercept Value:  6.990695467558789
Slope Value:  [0.49245183 0.09372889]


In [35]:
y_pred = model.predict(x)

In [36]:
print(y_pred)

[ 7.08442436  9.5466835   9.64041239 19.77063564 24.32023836 30.55696111
 37.2623283  39.81831634]


In [38]:
y_pred = model.intercept_ + np.sum(model.coef_ * x, axis=1)
print('Predicted Response :', y_pred, sep='\n')

Predicted Response :
[ 7.08442436  9.5466835   9.64041239 19.77063564 24.32023836 30.55696111
 37.2623283  39.81831634]


In [39]:
x_new = np.arange(10).reshape((-1, 2))
y_new = model.predict(x_new)

print(x_new)
print(y_new)

[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]
[ 7.08442436  8.25678579  9.42914723 10.60150867 11.77387011]


### Polynomial Regression With scikit-learn

In [40]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [42]:
x = np.array([5,15,25,35,45,55]).reshape(-1,1)
y = np.array([15, 11, 2, 8, 25, 32])

In [44]:
transformer = PolynomialFeatures(degree=2, include_bias=False)
transformer.fit(x)
x_ = transformer.transform(x)

In [45]:
print(x)

[[ 5]
 [15]
 [25]
 [35]
 [45]
 [55]]


In [46]:
print(x_)

[[   5.   25.]
 [  15.  225.]
 [  25.  625.]
 [  35. 1225.]
 [  45. 2025.]
 [  55. 3025.]]


In [48]:
# x_ = PolynomialFeatures(degree=2, include_bias=False).fit_transform(x)

In [51]:
model = LinearRegression().fit(x_, y)

In [53]:
#Coefficient of determination (𝑅²)
r_sq = model.score(x_,y)
print('Coefficient of determination (𝑅²) : ', r_sq)
print("Intercept Value: ", model.intercept_)
print("Slope Value: ", model.coef_)

Coefficient of determination (𝑅²) :  0.8908516262498564
Intercept Value:  21.37232142857144
Slope Value:  [-1.32357143  0.02839286]


In [57]:
x_ = PolynomialFeatures(degree=2, include_bias=True).fit_transform(x)
model = LinearRegression(fit_intercept=False).fit(x_, y)

In [58]:
print(x_)

[[1.000e+00 5.000e+00 2.500e+01]
 [1.000e+00 1.500e+01 2.250e+02]
 [1.000e+00 2.500e+01 6.250e+02]
 [1.000e+00 3.500e+01 1.225e+03]
 [1.000e+00 4.500e+01 2.025e+03]
 [1.000e+00 5.500e+01 3.025e+03]]


In [59]:
#Coefficient of determination (𝑅²)
r_sq = model.score(x_,y)
print('Coefficient of determination (𝑅²) : ', r_sq)
print("Intercept Value: ", model.intercept_)
print("Slope Value: ", model.coef_)

Coefficient of determination (𝑅²) :  0.8908516262498564
Intercept Value:  0.0
Slope Value:  [21.37232143 -1.32357143  0.02839286]


In [66]:
y_new = model.predict(x_)
print('Predicted Response :', y_pred, sep='\n')

Predicted Response :
[ 7.08442436  9.5466835   9.64041239 19.77063564 24.32023836 30.55696111
 37.2623283  39.81831634]


### Advanced Linear Regression With statsmodels

In [68]:
import numpy as np
import statsmodels.api as sm

In [69]:
x = [[0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]]
y = [4, 5, 20, 14, 32, 22, 38, 43]
x, y = np.array(x), np.array(y)

In [70]:
x = sm.add_constant(x)

In [72]:
print(x)

[[ 1.  0.  1.]
 [ 1.  5.  1.]
 [ 1. 15.  2.]
 [ 1. 25.  5.]
 [ 1. 35. 11.]
 [ 1. 45. 15.]
 [ 1. 55. 34.]
 [ 1. 60. 35.]]


In [73]:
model = sm.OLS(y,x)

In [74]:
results = model.fit()

In [75]:
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.862
Model:                            OLS   Adj. R-squared:                  0.806
Method:                 Least Squares   F-statistic:                     15.56
Date:                Mon, 14 Sep 2020   Prob (F-statistic):            0.00713
Time:                        13:11:18   Log-Likelihood:                -24.316
No. Observations:                   8   AIC:                             54.63
Df Residuals:                       5   BIC:                             54.87
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          5.5226      4.431      1.246      0.2

  "anyway, n=%i" % int(n))


In [78]:
print('Coefficient of determination: ' , results.rsquared)
print('Adjusted coefficient of determination: ', results.rsquared_adj)
print('Regression coefficients: ', results.params)

Coefficient of determination:  0.8615939258756776
Adjusted coefficient of determination:  0.8062314962259487
Regression coefficients:  [5.52257928 0.44706965 0.25502548]


In [80]:
print('Predicted Response: ', results.fittedvalues, sep='\n')
print('Predicted Response: ', results.predict(x), sep='\n')

Predicted Response: 
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]
Predicted Response: 
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


In [81]:
x_new = sm.add_constant(np.arange(10).reshape(-1,2))

In [82]:
print(x_new)

[[1. 0. 1.]
 [1. 2. 3.]
 [1. 4. 5.]
 [1. 6. 7.]
 [1. 8. 9.]]


In [83]:
y_new = results.predict(x_new)

In [85]:
print(y_new)

[ 5.77760476  7.18179502  8.58598528  9.99017554 11.3943658 ]
