# Linear Regression Calculations
This notebook demonstrates how to calculate various metrics and perform operations for linear regression using Python.

In [1]:
# === Import Libraries ===
import numpy as np
import pandas as pd
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt


## Scenario 1: Manual Calculation of Regression Coefficients
We calculate the slope and intercept manually.

In [2]:
# Example data
x = np.array([0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 12, 12])
y = np.array([16, 116, 1170, 841, 2287, 2012, 2653, 3333, 4270, 3999, 5750, 5407])

# Calculate means
mean_x = np.mean(x)
mean_y = np.mean(y)

# Calculate slope and intercept
x_dev = x - mean_x
y_dev = y - mean_y
slope = np.sum(x_dev * y_dev) / np.sum(x_dev**2)
intercept = mean_y - slope * mean_x

print(f"Slope: {slope:.4f}")
print(f"Intercept: {intercept:.4f}")

Slope: 467.5821
Intercept: 160.7286


## Scenario 2: Fit a Linear Regression Model
We use `statsmodels` to fit a regression model and obtain key metrics.

In [3]:
# Fit the linear regression model using statsmodels
model = smf.ols('y ~ x', data={'x': x, 'y': y}).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.984
Model:                            OLS   Adj. R-squared:                  0.982
Method:                 Least Squares   F-statistic:                     608.4
Date:                Sat, 14 Dec 2024   Prob (F-statistic):           2.74e-10
Time:                        15:59:48   Log-Likelihood:                -82.615
No. Observations:                  12   AIC:                             169.2
Df Residuals:                      10   BIC:                             170.2
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    160.7286    125.745      1.278      0.2

  return hypotest_fun_in(*args, **kwds)


## Scenario 3: Predicting Values
We predict values of y for given x using the fitted model.

In [4]:
# Predict values
x_new = np.array([1, 3, 5, 7, 9, 11])
y_pred = intercept + slope * x_new
print("Predicted values:")
print(y_pred)

Predicted values:
[ 628.31071429 1563.475      2498.63928571 3433.80357143 4368.96785714
 5304.13214286]


## Scenario 4: Calculating Residuals
Residuals are the differences between the observed values and the values predicted by the model.

In [5]:
# Calculate residuals
y_pred_full = intercept + slope * x
residuals = y - y_pred_full
print("Residuals:")
print(residuals)

Residuals:
[-144.72857143  -44.72857143   74.10714286 -254.89285714  255.94285714
  -19.05714286 -313.22142857  366.77857143  368.61428571   97.61428571
  -21.71428571 -364.71428571]


## Scenario 5: R-Squared Calculation
R-squared is a measure of how well the regression model fits the data.

In [6]:
# Calculate R-squared manually
ss_total = np.sum((y - mean_y)**2)
ss_residual = np.sum(residuals**2)
r_squared = 1 - (ss_residual / ss_total)
print(f"R-Squared: {r_squared:.4f}")

R-Squared: 0.9838


## Scenario 6: Standard Error of the Regression Coefficients
The standard errors of the slope and intercept indicate the variability of these estimates.

In [7]:
# Standard error calculation
n = len(x)
s_squared = np.sum(residuals**2) / (n - 2)
std_error_slope = np.sqrt(s_squared / np.sum((x - mean_x)**2))
std_error_intercept = np.sqrt(
    s_squared * (1/n + mean_x**2 / np.sum((x - mean_x)**2))
)

print(f"Standard Error (Slope): {std_error_slope:.4f}")
print(f"Standard Error (Intercept): {std_error_intercept:.4f}")

Standard Error (Slope): 18.9568
Standard Error (Intercept): 125.7452


## Scenario 7: Confidence Intervals for Coefficients
We calculate confidence intervals for the slope and intercept.

In [8]:
# Confidence intervals for slope and intercept
t_value = stats.t.ppf(1 - 0.025, df=n - 2)  # 95% confidence interval
conf_interval_slope = (
    slope - t_value * std_error_slope,
    slope + t_value * std_error_slope
)
conf_interval_intercept = (
    intercept - t_value * std_error_intercept,
    intercept + t_value * std_error_intercept
)

print(f"95% Confidence Interval (Slope): {conf_interval_slope}")
print(f"95% Confidence Interval (Intercept): {conf_interval_intercept}")

95% Confidence Interval (Slope): (425.3437421077541, 509.8205436065315)
95% Confidence Interval (Intercept): (-119.44928263220555, 440.90642548934903)
