In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score, mean_squared_error

# Same small dataset
np.random.seed(42)
x = np.random.rand(20, 1) * 10
y = 2.5 * x**2 + 0.5 * x + np.random.randn(20, 1) * 10

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# ═══════════════════════════════════════════
# GOOD FIT: degree=2 (matches actual pattern!)
# ═══════════════════════════════════════════

poly_good = PolynomialFeatures(degree=2)  # Just right!
x_train_good = poly_good.fit_transform(x_train)
x_test_good = poly_good.transform(x_test)

print(f"Training samples: {len(x_train)}")
print(f"Number of features: {x_train_good.shape[1]}")
print(f"Samples > Features = GOOD! ✓")
print()

model_good = LinearRegression()
model_good.fit(x_train_good, y_train)

y_train_pred = model_good.predict(x_train_good)
y_test_pred = model_good.predict(x_test_good)

print("=" * 50)
print("GOOD FIT MODEL (degree=2)")
print("=" * 50)
print(f"Train R²: {r2_score(y_train, y_train_pred):.4f}")
print(f"Test R²:  {r2_score(y_test, y_test_pred):.4f}")
print(f"Gap:      {abs(r2_score(y_train, y_train_pred) - r2_score(y_test, y_test_pred)):.4f}")

Training samples: 16
Number of features: 3
Samples > Features = GOOD! ✓

GOOD FIT MODEL (degree=2)
Train R²: 0.9895
Test R²:  0.9798
Gap:      0.0097
