# Linear Regression — Effects of Common Data Transformations
This notebook demonstrates, with code, how coefficients, predictions, and R² behave under common transformations of X and Y.
We use a synthetic dataset and closed-form OLS.


In [1]:
import numpy as np, pandas as pd

rng = np.random.default_rng(42)

def make_data(n=200, p=2, noise=1.0):
    X = rng.normal(size=(n, p))
    beta_true = np.array([1.5, -2.0])
    y = 0.7 + X @ beta_true + rng.normal(scale=noise, size=n)
    return X, y, beta_true, 0.7

def ols_fit(X, y, fit_intercept=True):
    if fit_intercept:
        X1 = np.column_stack([np.ones(len(X)), X])
    else:
        X1 = X
    beta = np.linalg.pinv(X1.T @ X1) @ (X1.T @ y)
    yhat = X1 @ beta
    resid = y - yhat
    ss_res = float((resid**2).sum())
    ss_tot = float(((y - y.mean())**2).sum())
    r2 = 1 - ss_res / ss_tot
    sigma2_hat = ss_res / (len(y) - X1.shape[1])
    XtX_inv = np.linalg.pinv(X1.T @ X1)
    se = np.sqrt(np.diag(XtX_inv) * sigma2_hat)
    return {"beta": beta, "yhat": yhat, "resid": resid, "r2": r2, "se": se, "X_design": X1}

X, y, beta_true, intercept_true = make_data()
m_base = ols_fit(X, y, fit_intercept=True)


## 1) Doubling all features X → slope divides by 2, predictions & R² unchanged

In [2]:
X1 = 2*X
m1 = ols_fit(X1, y, fit_intercept=True)
print("Base beta:", m_base["beta"])
print("New  beta:", m1["beta"], "(expect slopes ≈ base/2)")
print("L2 diff of predictions:", np.linalg.norm(m1["yhat"] - m_base["yhat"]))
print("R² base vs new:", m_base["r2"], m1["r2"])

Base beta: [ 0.63054225  1.48216054 -2.05330507]
New  beta: [ 0.63054225  0.74108027 -1.02665254] (expect slopes ≈ base/2)
L2 diff of predictions: 2.2879463045691018e-14
R² base vs new: 0.8385724376872568 0.8385724376872568


## 2) Add constant to X → slopes same, intercept shifts by -c·beta

In [3]:
c = np.array([0.8, -1.2])
X2 = X + c
m2 = ols_fit(X2, y, fit_intercept=True)
print("Base beta:", m_base["beta"])
print("New  beta:", m2["beta"], "(expect slopes ≈ unchanged)")
print("L2 diff of predictions:", np.linalg.norm(m2["yhat"] - m_base["yhat"]))
print("R² base vs new:", m_base["r2"], m2["r2"])

Base beta: [ 0.63054225  1.48216054 -2.05330507]
New  beta: [-3.01915226  1.48216054 -2.05330507] (expect slopes ≈ unchanged)
L2 diff of predictions: 5.695112347139579e-14
R² base vs new: 0.8385724376872568 0.8385724376872568


## 3) Center X → slopes same, intercept ≈ mean(Y)

In [4]:
X3 = X - X.mean(axis=0, keepdims=True)
m3 = ols_fit(X3, y, fit_intercept=True)
print("Base beta:", m_base["beta"])
print("New  beta:", m3["beta"])
print("mean(Y) ≈", y.mean(), "intercept after centering ≈", m3["beta"][0])
print("Predictions L2 diff:", np.linalg.norm(m3["yhat"] - m_base["yhat"]))

Base beta: [ 0.63054225  1.48216054 -2.05330507]
New  beta: [ 0.72977714  1.48216054 -2.05330507]
mean(Y) ≈ 0.7297771361811334 intercept after centering ≈ 0.729777136181134
Predictions L2 diff: 1.3114182033767694e-14


## 4) Standardize X (z-score) → standardized coefficients

In [5]:
X4 = (X - X.mean(axis=0, keepdims=True)) / X.std(axis=0, ddof=0)
m4 = ols_fit(X4, y, fit_intercept=True)
print("Standardized beta:", m4["beta"])
print("R²:", m4["r2"])

Standardized beta: [ 0.72977714  1.4617762  -1.87652377]
R²: 0.8385724376872568


## 5) Multiply Y by k → all coefficients and SE scale by k, R² unchanged

In [6]:
k = 3.0
y5 = k*y
m5 = ols_fit(X, y5, fit_intercept=True)
print("Base beta:", m_base["beta"], "SE≈", m_base["se"])
print("New  beta:", m5["beta"], "SE≈", m5["se"])
print("R² base vs new:", m_base["r2"], m5["r2"])

Base beta: [ 0.63054225  1.48216054 -2.05330507] SE≈ [0.07221093 0.07328405 0.07908507]
New  beta: [ 1.89162676  4.44648162 -6.15991522] SE≈ [0.2166328  0.21985215 0.23725522]
R² base vs new: 0.8385724376872568 0.8385724376872568


## 6) Add constant to Y → intercept shifts by the same constant; slopes unchanged

In [7]:
a = -2.5
y6 = y + a
m6 = ols_fit(X, y6, fit_intercept=True)
print("Base beta:", m_base["beta"])
print("New  beta:", m6["beta"], "(expect intercept shift by a)")
print("R² base vs new:", m_base["r2"], m6["r2"])

Base beta: [ 0.63054225  1.48216054 -2.05330507]
New  beta: [-1.86945775  1.48216054 -2.05330507] (expect intercept shift by a)
R² base vs new: 0.8385724376872568 0.8385724376872568


## 7) Scale only one feature X_j by c → its slope scales by 1/c (others may move w/ collinearity)

In [8]:
j = 0
c7 = -4.0
X7 = X.copy()
X7[:, j] = c7 * X7[:, j]
m7 = ols_fit(X7, y, fit_intercept=True)
print("Base beta:", m_base["beta"])
print("New  beta:", m7["beta"], "(expect beta[j] ≈ base[j]/c7 and sign flip if c7<0)")
print("R² base vs new:", m_base["r2"], m7["r2"])

Base beta: [ 0.63054225  1.48216054 -2.05330507]
New  beta: [ 0.63054225 -0.37054013 -2.05330507] (expect beta[j] ≈ base[j]/c7 and sign flip if c7<0)
R² base vs new: 0.8385724376872568 0.8385724376872568


## 8) Flip sign of a feature → its slope flips sign; predictions unchanged after refit

In [9]:
X8 = X.copy()
X8[:, 1] = -X8[:, 1]
m8 = ols_fit(X8, y, fit_intercept=True)
print("Base beta:", m_base["beta"])
print("New  beta:", m8["beta"], "(expect beta for x2 flips sign)")
print("Predictions L2 diff:", np.linalg.norm(m8["yhat"] - m_base["yhat"]))

Base beta: [ 0.63054225  1.48216054 -2.05330507]
New  beta: [0.63054225 1.48216054 2.05330507] (expect beta for x2 flips sign)
Predictions L2 diff: 0.0


## 9) Remove intercept → coefficients change; model forced through origin; R² often worse

In [10]:
def ols_fit_no_intercept(X, y):
    return ols_fit(X, y, fit_intercept=False)

m9 = ols_fit_no_intercept(X, y)
print("No-intercept beta:", m9["beta"])
print("R² (no-intercept) vs base:", m9["r2"], "vs", m_base["r2"])

No-intercept beta: [ 1.49785044 -2.07875667]
R² (no-intercept) vs base: 0.7760934592971206 vs 0.8385724376872568
