In [60]:
#import packages and classes
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

In [32]:
#provide data - linear regression
x = np.array([5,15,25,35,45,55]).reshape(-1,1)
print(x)

[[ 5]
 [15]
 [25]
 [35]
 [45]
 [55]]


In [33]:
y = np.array([5,20,14,32,22,38])
print(y)

[ 5 20 14 32 22 38]


In [36]:
#create a model and fit it

model = LinearRegression()

In [37]:
model.fit(x,y)

LinearRegression()

In [39]:
model = LinearRegression.fit(x, y)

TypeError: fit() missing 1 required positional argument: 'y'

In [41]:
#get results

r_sq = model.score(x, y)
print('Coefficient of determination:', r_sq)

Coefficient of determination: 0.715875613747954


In [8]:
print('Intercept:', model.intercept_)

Intercept: 5.633333333333329


In [9]:
print('Slope:', model.coef_)

Slope: [0.54]


In [12]:
#using y as a two-dimensional array

new_model = LinearRegression().fit(x, y.reshape(-1,1))
print('Intercept:', new_model.intercept_)

Intercept: [5.63333333]


In [13]:
print('Slope:', new_model.coef_)

Slope: [[0.54]]


In [16]:
#predict response

y_pred = model.predict(x)
print('predicted response:', y_pred, sep='\n')

predicted response:
[ 8.33333333 13.73333333 19.13333333 24.53333333 29.93333333 35.33333333]


In [18]:
#another prediction method

y_pred = model.intercept_ + model.coef_ * x
print('predicted intercept:', y_pred, sep='\n')

predicted intercept:
[[ 8.33333333]
 [13.73333333]
 [19.13333333]
 [24.53333333]
 [29.93333333]
 [35.33333333]]


In [19]:
#another example on x 

x_new = np.arange(5).reshape(-1,1)
print(x_new)

[[0]
 [1]
 [2]
 [3]
 [4]]


In [20]:
y_new = model.predict(x_new)
print(y_new)

[5.63333333 6.17333333 6.71333333 7.25333333 7.79333333]


In [30]:
# for multiple regression

x = [[0,1], [5,1], [15,2], [25,5], [35,11], [45,15], [55,34], [60,35]]
y = [4,5,20,14,32,22,38,43]
x,y = np.array(x), np.array(y)

print(x)
print(y)

[[ 0  1]
 [ 5  1]
 [15  2]
 [25  5]
 [35 11]
 [45 15]
 [55 34]
 [60 35]]
[ 4  5 20 14 32 22 38 43]


In [31]:
#create a model and fit it using the fit() funtion

model = LinearRegression().fit(x, y)

In [32]:
#get results

r_sq = model.score(x, y)
print('coefficient of determination:', r_sq)

coefficient of determination: 0.8615939258756776


In [33]:
print('intercept:', model.intercept_)

intercept: 5.5225792751981935


In [34]:
print('slope:', model.coef_)

slope: [0.44706965 0.25502548]


In [35]:
#predict response

y_pred = model.predict(x)
print('predicted response:', y_pred, sep='\n')

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


In [36]:
#another method of predicting response

y_pred = model.intercept_ + np.sum(model.coef_ * x, axis=1)
print('predicted response:', y_pred, sep='\n')

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


In [37]:
#applying the model to a new data

x_new = np.arange(10).reshape(-1,2)
print(x_new)

[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]


In [38]:
y_new = model.predict(x_new)
print(y_new)

[ 5.77760476  7.18179502  8.58598528  9.99017554 11.3943658 ]


In [47]:
#polynomial regression

from sklearn.preprocessing import PolynomialFeatures

In [41]:
#provide data

x = np.array([5,15,25,35,45,55]).reshape(-1,1)
y = np.array([15,11,2,8,25,32])

In [42]:
#transform input data

transformer = PolynomialFeatures(degree=2, include_bias=False)

In [43]:
transformer.fit(x)

PolynomialFeatures(include_bias=False)

In [44]:
x_ = transformer.transform(x)

In [46]:
x_ = PolynomialFeatures(degree=2, include_bias=False).fit_transform(x)
print(x_)

[[   5.   25.]
 [  15.  225.]
 [  25.  625.]
 [  35. 1225.]
 [  45. 2025.]
 [  55. 3025.]]


In [56]:
#create a model to fit

model = LinearRegression().fit(x_, y)

In [48]:
#get results

r_sq = model.score(x_, y)
print('coeffecient of determination:', r_sq)

coeffecient of determination: 0.8908516262498563


In [49]:
print('intercept:', model.intercept_)

intercept: 21.372321428571453


In [50]:
print('coefficient:', model.coef_)

coefficient: [-1.32357143  0.02839286]


In [60]:
#using a different transformation and regression arguement

x_ = PolynomialFeatures(degree=2, include_bias=True).fit_transform(x)
print(x_)

[[1.000e+00 5.000e+00 2.500e+01]
 [1.000e+00 1.500e+01 2.250e+02]
 [1.000e+00 2.500e+01 6.250e+02]
 [1.000e+00 3.500e+01 1.225e+03]
 [1.000e+00 4.500e+01 2.025e+03]
 [1.000e+00 5.500e+01 3.025e+03]]


In [61]:
model = LinearRegression(fit_intercept=False).fit(x_, y)

In [62]:
r_sq = model.score(x_, y)
print('coeffecient of determination:', r_sq)

coeffecient of determination: 0.8908516262498563


In [63]:
print('intercept:', model.intercept_)

intercept: 0.0


In [64]:
print('intercept:', model.coef_)

intercept: [21.37232143 -1.32357143  0.02839286]


In [65]:
#predict response

y_pred = model.predict(x_)
print('predicted response:', y_pred, sep='\n')

predicted response:
[15.46428571  7.90714286  6.02857143  9.82857143 19.30714286 34.46428571]


In [45]:
#another example on polynomial regression

x = [
    [0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]
]

y = [4, 5, 20, 14, 32, 22, 38, 43]

x, y = np.array(x), np.array(y)

print(x)
print(y)

[[ 0  1]
 [ 5  1]
 [15  2]
 [25  5]
 [35 11]
 [45 15]
 [55 34]
 [60 35]]
[ 4  5 20 14 32 22 38 43]


In [48]:
#transform input data

transformer = PolynomialFeatures(degree=2, include_bias=False)

In [49]:
transformer.fit(x)

PolynomialFeatures(include_bias=False)

In [51]:
x_ = transformer.transform(x)

In [53]:
x_ = PolynomialFeatures(degree=2, include_bias=False).fit_transform(x)
print(x_)

[[0.000e+00 1.000e+00 0.000e+00 0.000e+00 1.000e+00]
 [5.000e+00 1.000e+00 2.500e+01 5.000e+00 1.000e+00]
 [1.500e+01 2.000e+00 2.250e+02 3.000e+01 4.000e+00]
 [2.500e+01 5.000e+00 6.250e+02 1.250e+02 2.500e+01]
 [3.500e+01 1.100e+01 1.225e+03 3.850e+02 1.210e+02]
 [4.500e+01 1.500e+01 2.025e+03 6.750e+02 2.250e+02]
 [5.500e+01 3.400e+01 3.025e+03 1.870e+03 1.156e+03]
 [6.000e+01 3.500e+01 3.600e+03 2.100e+03 1.225e+03]]


In [54]:
model = LinearRegression(fit_intercept=False).fit(x_, y)

In [57]:
r_sq = model.score(x_, y)
print('coefficient of determinaton:', r_sq)

print('intercept:', model.intercept_)

print('coefficient:', model.coef_)

coefficient of determinaton: 0.9447160093532023
intercept: 0.0
coefficient: [ 2.54298609  0.23582117 -0.1580735   0.49381445 -0.47918613]


In [56]:
y_pred = model.predict(x_)
print('predicted response:', y_pred, sep='\n')

predicted response:
[-0.24336496 10.98880023 15.94758501 15.70497325 30.09554978 23.38072829
 39.20367564 41.77564528]


In [64]:
#using advanced linear regression

import statsmodels.api as sm

  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,
  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,


In [65]:
x = [
    [0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]
]

y = [4, 5, 20, 14, 32, 22, 38, 43]

x, y = np.array(x), np.array(y)

print(x)
print(y)

[[ 0  1]
 [ 5  1]
 [15  2]
 [25  5]
 [35 11]
 [45 15]
 [55 34]
 [60 35]]
[ 4  5 20 14 32 22 38 43]


In [68]:
#adding column (one's) to x because it doesn't take b0 into account by default

x = sm.add_constant(x)
print(x)

[[ 1.  0.  1.]
 [ 1.  5.  1.]
 [ 1. 15.  2.]
 [ 1. 25.  5.]
 [ 1. 35. 11.]
 [ 1. 45. 15.]
 [ 1. 55. 34.]
 [ 1. 60. 35.]]


In [70]:
#create a model and fit in

model = sm.OLS(y,x)   #in this case, the dependent variables comes first

results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.862
Model:                            OLS   Adj. R-squared:                  0.806
Method:                 Least Squares   F-statistic:                     15.56
Date:                Thu, 09 Feb 2023   Prob (F-statistic):            0.00713
Time:                        17:50:44   Log-Likelihood:                -24.316
No. Observations:                   8   AIC:                             54.63
Df Residuals:                       5   BIC:                             54.87
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          5.5226      4.431      1.246      0.2



In [77]:
#further results 

print('coeffiecient of determination:', results.rsquared)
print('adjusted coefficient of determination:', results.rsquared_adj)
print('regression coefficents:', results.params)

coeffiecient of determination: 0.8615939258756777
adjusted coefficient of determination: 0.8062314962259488
regression coefficents: [5.52257928 0.44706965 0.25502548]


In [83]:
#predict response

print('predicted response:', results.fittedvalues)

#or

print('predicted response:', results.predict(x))

predicted response: [ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]
predicted response: [ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]
