<a href="https://colab.research.google.com/github/IvaroEkel/Probabilistic-Machine-Learning_Lecture/blob/main/Linear_Regression_1_Frequentist_Bayesian_OLS_MLS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Linear Regression: Frequentist - Bayesian | OLS - MLS

In this notebook, we explore linear regression from both:
- **Frequentist**: Point estimates via Maximum Likelihood.
- **Bayesian**: Prior, posterior, and predictive distribution.

and

- **Ordinary Linear Regression** (OLS) (one variable)
- **Multiple Linear Regression** (MLS) (multiple variables)


In [None]:
# Linear Regression on the Boston Housing Dataset

# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm
import pymc as pm
import arviz as az

# Load dataset
boston = load_boston()
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df['MEDV'] = boston.target

# --- FREQUENTIST SINGLE REGRESSION: MEDV vs RM ---
X = df[['RM']]
y = df['MEDV']
X = sm.add_constant(X)
model = sm.OLS(y, X).fit()
print(model.summary())

# Plot
sns.regplot(x='RM', y='MEDV', data=df)
plt.title('Linear Regression: MEDV vs RM')
plt.show()

# --- FREQUENTIST MULTIPLE REGRESSION ---
X = df.drop('MEDV', axis=1)
y = df['MEDV']
X = sm.add_constant(X)
model = sm.OLS(y, X).fit()
print(model.summary())

# --- BAYESIAN SINGLE REGRESSION: MEDV vs RM ---
X = df[['RM']].values.flatten()
y = df['MEDV'].values

with pm.Model() as model_bayes_single:
    alpha = pm.Normal('alpha', mu=0, sigma=10)
    beta = pm.Normal('beta', mu=0, sigma=1)
    sigma = pm.HalfNormal('sigma', sigma=5)
    mu = alpha + beta * X
    y_obs = pm.Normal('y_obs', mu=mu, sigma=sigma, observed=y)
    trace = pm.sample(1000, tune=1000, return_inferencedata=True)

az.plot_trace(trace)
plt.show()

# --- BAYESIAN MULTIPLE REGRESSION ---
X = df.drop('MEDV', axis=1)
X_scaled = StandardScaler().fit_transform(X)
y = df['MEDV'].values

with pm.Model() as model_bayes_multi:
    alpha = pm.Normal('alpha', mu=0, sigma=10)
    betas = pm.Normal('betas', mu=0, sigma=1, shape=X.shape[1])
    sigma = pm.HalfNormal('sigma', sigma=5)
    mu = alpha + pm.math.dot(X_scaled, betas)
    y_obs = pm.Normal('y_obs', mu=mu, sigma=sigma, observed=y)
    trace_multi = pm.sample(1000, tune=1000, return_inferencedata=True)

az.plot_trace(trace_multi, var_names=['alpha', 'betas'])
plt.show()

# Posterior predictive checks
with model_bayes_multi:
    ppc = pm.sample_posterior_predictive(trace_multi, var_names=['y_obs'])
az.plot_ppc(az.from_pymc3(posterior_predictive=ppc, model=model_bayes_multi))
plt.show()
