# Bias/Variance Solutions

## Task 1: The Validation Curve

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# 1. Data
np.random.seed(42)
def true_fun(X):
    return np.sin(2 * np.pi * X)

n_samples = 30
X_train = np.sort(np.random.rand(n_samples))
y_train = true_fun(X_train) + np.random.randn(n_samples) * 0.1
X_train = X_train[:, np.newaxis]

X_test = np.sort(np.random.rand(20))
y_test = true_fun(X_test) + np.random.randn(20) * 0.1
X_test = X_test[:, np.newaxis]

# 2. Loop
degrees = range(1, 15)
train_scores = []
test_scores = []

for d in degrees:
    pipeline = Pipeline([
        ("poly", PolynomialFeatures(degree=d, include_bias=False)),
        ("lr", LinearRegression())
    ])
    pipeline.fit(X_train, y_train)
    
    train_mse = mean_squared_error(y_train, pipeline.predict(X_train))
    test_mse = mean_squared_error(y_test, pipeline.predict(X_test))
    
    train_scores.append(train_mse)
    test_scores.append(test_mse)

# 3. Plot
plt.plot(degrees, train_scores, label='Train MSE', marker='o')
plt.plot(degrees, test_scores, label='Test MSE', marker='o')
plt.xlabel("Degree (Complexity)")
plt.ylabel("MSE Error")
plt.ylim(0, 0.05) # Zoom in
plt.legend()
plt.title("Bias-Variance Tradeoff Curve")
plt.grid(True)
plt.show()

## Task 2: Regularization

In [None]:
from sklearn.linear_model import Ridge

# Degree 15, but with Ridge Regularization
pipeline = Pipeline([
    ("poly", PolynomialFeatures(degree=15, include_bias=False)),
    ("ridge", Ridge(alpha=0.1)) # alpha controls regularization strength
])

pipeline.fit(X_train, y_train)

X_plot = np.linspace(0, 1, 100)[:, np.newaxis]
plt.plot(X_plot, pipeline.predict(X_plot), label="Ridge (d=15)")
plt.plot(X_plot, true_fun(X_plot), label="True")
plt.scatter(X_train, y_train, label="Data")
plt.title("Regularized Polynomial")
plt.legend()
plt.show()