This code is an implementation of linear regression using statsmodel.
<br>Statsmodel package gives detailed insight of the regression.

> For more info. on linear regression read the markdowns from other python files in the directory.

In [12]:
import numpy as np
import statsmodels.api as sm

In [13]:
# Creating data
x = [
  [0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]
]
y = [4, 5, 20, 14, 32, 22, 38, 43]
x, y = np.array(x), np.array(y)

<b>Note:</b> We need to add a column of 1s if we need to calculate the intercept. This is because statsmodel by default does not account for the intercept

In [14]:
x = sm.add_constant(x)
print(x)

[[ 1.  0.  1.]
 [ 1.  5.  1.]
 [ 1. 15.  2.]
 [ 1. 25.  5.]
 [ 1. 35. 11.]
 [ 1. 45. 15.]
 [ 1. 55. 34.]
 [ 1. 60. 35.]]


In [15]:
# This model uses Ordinary Least Squares

# Fit model
model = sm.OLS(y, x)

# Apply model
results = model.fit()

In [16]:
# Print detailed report
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.862
Model:                            OLS   Adj. R-squared:                  0.806
Method:                 Least Squares   F-statistic:                     15.56
Date:                Fri, 14 Apr 2023   Prob (F-statistic):            0.00713
Time:                        03:02:06   Log-Likelihood:                -24.316
No. Observations:                   8   AIC:                             54.63
Df Residuals:                       5   BIC:                             54.87
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          5.5226      4.431      1.246      0.2



Extracting values from the report

In [17]:
print(f"coefficient of determination: {results.rsquared}")

print(f"adjusted coefficient of determination: {results.rsquared_adj}")

print(f"regression coefficients: {results.params}")

coefficient of determination: 0.8615939258756776
adjusted coefficient of determination: 0.8062314962259487
regression coefficients: [5.52257928 0.44706965 0.25502548]


In [18]:
# Predict
print(f"predicted response:\n{results.fittedvalues}")

# Alternatively
print(f"\npredicted response:\n{results.predict(x)}")

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


In [19]:
# Predict using new test set
x_new = sm.add_constant(np.arange(10).reshape((-1, 2)))
y_new = results.predict(x_new)
print(y_new)

[ 5.77760476  7.18179502  8.58598528  9.99017554 11.3943658 ]
