In [5]:

"""
This notebook demonstrates linear regression using:
1. Manual matrix algebra calculations (matching LaTeX document)
2. scikit-learn implementation with train/test split
3. Comprehensive visualizations and model evaluation
"""

# ============================================================================
#  Import Required Libraries
# ============================================================================

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import pandas as pd

# Set style for better visualizations
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 11

print(" All libraries imported successfully!")
print(f"NumPy version: {np.__version__}")
print(f"Matplotlib version: {plt.matplotlib.__version__}")


# ============================================================================
# - Define the Dataset
# ============================================================================

print("=" * 70)
print("LINEAR REGRESSION: MATRIX ALGEBRA APPROACH")
print("=" * 70)

# Original data from the problem
X_values = np.array([1, 2, 3, 4])
Y_values = np.array([10, 15, 20, 25])

print("\nOriginal Dataset:")
print(f"X = {X_values}")
print(f"Y = {Y_values}")

# Create feature matrix with intercept column (ones)
X_matrix = np.column_stack([np.ones(len(X_values)), X_values])

print(f"\nFeature Matrix X (with intercept column):")
print(X_matrix)
print(f"Shape: {X_matrix.shape}")

print(f"\nTarget Vector Y:")
print(Y_values.reshape(-1, 1))
print(f"Shape: {Y_values.shape}")
# ============================================================================
# - Manual Calculation - Step 1: Compute X^T X
# ============================================================================

print("\n" + "=" * 70)
print("STEP 1: COMPUTE X^T X")
print("=" * 70)

X_transpose = X_matrix.T
print("\nX^T (Transpose of X):")
print(X_transpose)
print(f"Shape: {X_transpose.shape}")

XTX = X_transpose @ X_matrix  # Matrix multiplication
print("\nX^T X:")
print(XTX)

# Verify dimensions
print(f"\nMatrix multiplication: ({X_transpose.shape}) × ({X_matrix.shape}) = ({XTX.shape})")

# ============================================================================
# - Manual Calculation - Step 2: Compute (X^T X)^(-1)
# ============================================================================

print("\n" + "=" * 70)
print("STEP 2: COMPUTE (X^T X)^(-1)")
print("=" * 70)

# Calculate determinant
det_XTX = np.linalg.det(XTX)
print(f"\nDeterminant of X^T X: {det_XTX}")

# Calculate inverse
XTX_inv = np.linalg.inv(XTX)
print("\n(X^T X)^(-1):")
print(XTX_inv)

# Verify inverse by multiplying XTX * XTX_inv (should equal identity matrix)
identity_check = XTX @ XTX_inv
print("\nVerification: (X^T X) × (X^T X)^(-1) = I")
print(np.round(identity_check, 10))  # Round to avoid floating point errors
print(" Inverse verified!" if np.allclose(identity_check, np.eye(2)) else "✗ Error in inverse")

# ============================================================================
# -  Manual Calculation - Step 3: Compute X^T Y
# ============================================================================

print("\n" + "=" * 70)
print("STEP 3: COMPUTE X^T Y")
print("=" * 70)

XTY = X_transpose @ Y_values
print("\nX^T Y:")
print(XTY)
print(f"Shape: {XTY.shape}")

# ============================================================================
# -  Manual Calculation - Step 4: Compute β = (X^T X)^(-1) X^T Y
# ============================================================================

print("\n" + "=" * 70)
print("STEP 4: COMPUTE β = (X^T X)^(-1) X^T Y")
print("=" * 70)

beta = XTX_inv @ XTY
print("\nCoefficient Vector β:")
print(beta)
print(f"\nβ₀ (Intercept) = {beta[0]}")
print(f"β₁ (Slope) = {beta[1]}")

print(f"\n Final Regression Equation: y = {beta[0]} + {beta[1]}x")


# ============================================================================
# -  Compute Predictions and Residuals
# ============================================================================

print("\n" + "=" * 70)
print("STEP 5: PREDICTIONS AND RESIDUALS")
print("=" * 70)

# Calculate predictions
Y_pred_manual = X_matrix @ beta
print("\nPredicted Values (ŷ = Xβ):")
print(Y_pred_manual)

# Calculate residuals
residuals = Y_values - Y_pred_manual
print("\nResiduals (ε = Y - ŷ):")
print(residuals)

# Calculate error metrics
SSE = np.sum(residuals**2)
MSE = SSE / len(Y_values)
RMSE = np.sqrt(MSE)
MAE = np.mean(np.abs(residuals))

# Calculate R²
Y_mean = np.mean(Y_values)
SST = np.sum((Y_values - Y_mean)**2)
R2 = 1 - (SSE / SST) if SST != 0 else 1

print(f"\n Model Quality Metrics:")
print(f"Sum of Squared Errors (SSE): {SSE:.4f}")
print(f"Mean Squared Error (MSE): {MSE:.4f}")
print(f"Root Mean Squared Error (RMSE): {RMSE:.4f}")
print(f"Mean Absolute Error (MAE): {MAE:.4f}")
print(f"R² Score: {R2:.4f}")

if R2 == 1.0:
    print("\n PERFECT FIT! The model explains 100% of the variance.")




LINEAR REGRESSION: MATRIX ALGEBRA APPROACH

Original Dataset:
X = [1 2 3 4]
Y = [10 15 20 25]

Feature Matrix X (with intercept column):
[[1. 1.]
 [1. 2.]
 [1. 3.]
 [1. 4.]]
Shape: (4, 2)

Target Vector Y:
[[10]
 [15]
 [20]
 [25]]
Shape: (4,)

STEP 1: COMPUTE X^T X

X^T (Transpose of X):
[[1. 1. 1. 1.]
 [1. 2. 3. 4.]]
Shape: (2, 4)

X^T X:
[[ 4. 10.]
 [10. 30.]]

Matrix multiplication: ((2, 4)) × ((4, 2)) = ((2, 2))

STEP 2: COMPUTE (X^T X)^(-1)

Determinant of X^T X: 20.000000000000007

(X^T X)^(-1):
[[ 1.5 -0.5]
 [-0.5  0.2]]

Verification: (X^T X) × (X^T X)^(-1) = I
[[1. 0.]
 [0. 1.]]
 Inverse verified!

STEP 3: COMPUTE X^T Y

X^T Y:
[ 70. 200.]
Shape: (2,)

STEP 4: COMPUTE β = (X^T X)^(-1) X^T Y

Coefficient Vector β:
[5. 5.]

β₀ (Intercept) = 5.0
β₁ (Slope) = 5.0

 Final Regression Equation: y = 5.0 + 5.0x

STEP 5: PREDICTIONS AND RESIDUALS

Predicted Values (ŷ = Xβ):
[10. 15. 20. 25.]

Residuals (ε = Y - ŷ):
[0. 0. 0. 0.]

 Model Quality Metrics:
Sum of Squared Errors (SSE): 0.00