In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, KFold, cross_val_score

# Helper function for neat plots
def show():
    plt.tight_layout()
    plt.show()

# Generate nonlinear dataset
np.random.seed(7)
X = np.linspace(-3, 3, 140).reshape(-1, 1)
y_true = 0.5 * X**3 - X**2 + 2 * X
y = y_true + np.random.randn(*y_true.shape) * 2.5

In [None]:
plt.figure(figsize=(6,4))
plt.scatter(X, y, alpha=0.7)
plt.title("Nonlinear dataset (cubic relationship + noise)")
plt.xlabel("x"); plt.ylabel("y")
show()

In [None]:
lin = LinearRegression().fit(X, y)
y_lin = lin.predict(X)

plt.figure(figsize=(6,4))
plt.scatter(X, y, alpha=0.5, label="data")
plt.plot(X, y_lin, linewidth=2, label="linear fit")
plt.title("Linear Regression: underfitting a nonlinear relationship")
plt.xlabel("x"); plt.ylabel("y"); plt.legend()
show()

##Polynomial regression (degree = 3) — Good fit

In [None]:
deg_good = 3
poly3 = Pipeline([
    ("poly", PolynomialFeatures(degree=deg_good, include_bias=False)),
    ("lin", LinearRegression())
]).fit(X, y)

y_poly3 = poly3.predict(X)

plt.figure(figsize=(6,4))
plt.scatter(X, y, alpha=0.5, label="data")
plt.plot(X, y_poly3, linewidth=2, label=f"polynomial fit (degree={deg_good})")
plt.title("Polynomial Regression captures the curve")
plt.xlabel("x"); plt.ylabel("y"); plt.legend()
show()

##Underfitting vs Overfitting (Degrees 1, 3, 10)

In [None]:
plt.figure(figsize=(6,4))
plt.scatter(X, y, alpha=0.35, label="data")
for d in (1, 3, 30):
    model = Pipeline([
        ("poly", PolynomialFeatures(degree=d, include_bias=False)),
        ("lin", LinearRegression())
    ]).fit(X, y)
    plt.plot(X, model.predict(X), linewidth=2, label=f"degree={d}")
plt.title("Model complexity: underfit vs just right vs overfit")
plt.xlabel("x"); plt.ylabel("y"); plt.legend()
show()

##Residuals for Degree 3

In [None]:
residuals = (y - y_poly3)
plt.figure(figsize=(6,4))
plt.scatter(X, residuals, alpha=0.6)
plt.axhline(0, linestyle="--", linewidth=1)
plt.title("Residuals for degree-3 polynomial fit")
plt.xlabel("x"); plt.ylabel("residual (y - ŷ)")
show()

##Bias–Variance Tradeoff: MSE vs Polynomial Degree

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
deg_range = range(1, 16)
train_mse, test_mse = [], []
for d in deg_range:
    model = Pipeline([
        ("poly", PolynomialFeatures(degree=d, include_bias=False)),
        ("lin", LinearRegression())
    ]).fit(X_train, y_train)
    train_mse.append(mean_squared_error(y_train, model.predict(X_train)))
    test_mse.append(mean_squared_error(y_test, model.predict(X_test)))

plt.figure(figsize=(6,4))
plt.plot(list(deg_range), train_mse, marker="o", label="train MSE")
plt.plot(list(deg_range), test_mse, marker="s", label="test MSE")
plt.title("Bias–Variance Tradeoff: MSE vs polynomial degree")
plt.xlabel("polynomial degree"); plt.ylabel("MSE"); plt.legend()
show()

##Regularization (Ridge) — Taming Overfitting

In [None]:
deg_high = 10
poly_high = PolynomialFeatures(degree=deg_high, include_bias=False)
Xh = poly_high.fit_transform(X)

lin_high = LinearRegression().fit(Xh, y)
ridge_01 = Ridge(alpha=0.1).fit(Xh, y)
ridge_1  = Ridge(alpha=1.0).fit(Xh, y)
ridge_10 = Ridge(alpha=10.0).fit(Xh, y)

plt.figure(figsize=(6,4))
plt.scatter(X, y, alpha=0.35, label="data")
plt.plot(X, lin_high.predict(Xh), linewidth=2, label="deg=10 (no reg)")
plt.plot(X, ridge_01.predict(Xh), linewidth=2, label="Ridge α=0.1")
plt.plot(X, ridge_1.predict(Xh),  linewidth=2, label="Ridge α=1")
plt.plot(X, ridge_10.predict(Xh), linewidth=2, label="Ridge α=10")
plt.title("Regularization reduces overfitting (same high-degree features)")
plt.xlabel("x"); plt.ylabel("y"); plt.legend()
show()

##Ground Truth vs Learned Curve

In [None]:
plt.figure(figsize=(6,4))
plt.scatter(X, y, alpha=0.25, label="data")
plt.plot(X, y_true, linewidth=2, label="ground truth (noise-free)")
plt.plot(X, y_poly3, linewidth=2, linestyle="--", label="polynomial degree=3")
plt.title("Ground truth vs learned model")
plt.xlabel("x"); plt.ylabel("y"); plt.legend()
show()

##Interactive Polynomial Degree Slider

In [None]:
try:
    from ipywidgets import interact, IntSlider

    def fit_and_plot_degree(d=3):
        model = Pipeline([
            ("poly", PolynomialFeatures(degree=d, include_bias=False)),
            ("lin", LinearRegression())
        ]).fit(X, y)
        plt.figure(figsize=(6,4))
        plt.scatter(X, y, alpha=0.35, label="data")
        plt.plot(X, model.predict(X), linewidth=2, label=f"degree={d}")
        plt.title("Interactive: change polynomial degree")
        plt.xlabel("x"); plt.ylabel("y"); plt.legend()
        show()

    interact(fit_and_plot_degree, d=IntSlider(min=1, max=25, step=1, value=3));
except Exception as e:
    print("Interactive widget unavailable in this environment:", e)