In [6]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score, mean_squared_error

# ═══════════════════════════════════════════════════════════
# STEP 1: Create Dataset
# ═══════════════════════════════════════════════════════════
# Create a CUBIC relationship: y = 0.5x³ - 2x² + 3x + noise

np.random.seed(42)
X = np.random.rand(50, 1) * 10  # 30 samples

# TODO: Create y with cubic pattern + noise (multiply noise by 20)
y = 0.5 * X**3 - 2* X**2 + np.random.randn(50,1)*20

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# ═══════════════════════════════════════════════════════════
# STEP 2: Create UNDERFITTING Model
# ═══════════════════════════════════════════════════════════
# TODO: Use degree=1 (too simple for cubic data)

poly_under = PolynomialFeatures(degree=1)  # YOUR CODE
X_train_under = poly_under.fit_transform(X_train) # YOUR CODE
X_test_under = poly_under.transform(X_test)   # YOUR CODE

model_under = LinearRegression()
model_under.fit(X_train_under, y_train)  # YOUR CODE

train_r2_under = r2_score(y_train, model_under.predict(X_train_under))
test_r2_under = r2_score(y_test, model_under.predict(X_test_under))

print("UNDERFITTING MODEL:")
print(f"  Train R²: {train_r2_under:.4f}")
print(f"  Test R²:  {test_r2_under:.4f}")
print()


# ═══════════════════════════════════════════════════════════
# STEP 3: Create OVERFITTING Model
# ═══════════════════════════════════════════════════════════
# TODO: Use degree=15 (too complex)

poly_over = PolynomialFeatures(degree=15)  # YOUR CODE
X_train_over = poly_over.fit_transform(X_train)  # YOUR CODE
X_test_over = poly_over.transform(X_test)   # YOUR CODE

model_over = LinearRegression()
model_over.fit(X_train_over, y_train)  # YOUR CODE

train_r2_over = r2_score(y_train, model_over.predict(X_train_over))
test_r2_over = r2_score(y_test, model_over.predict(X_test_over))

print("OVERFITTING MODEL:")
print(f"  Train R²: {train_r2_over:.4f}")
print(f"  Test R²:  {test_r2_over:.4f}")
print()


# ═══════════════════════════════════════════════════════════
# STEP 4: Create GOOD FIT Model
# ═══════════════════════════════════════════════════════════
# TODO: Use the correct degree for cubic data

poly_good = PolynomialFeatures(degree=3)  # YOUR CODE
X_train_good = poly_good.fit_transform(X_train)  # YOUR CODE
X_test_good = poly_good.transform(X_test)   # YOUR CODE

model_good = LinearRegression()
model_good.fit(X_train_good, y_train)  # YOUR CODE

train_r2_good = r2_score(y_train, model_good.predict(X_train_good))
test_r2_good = r2_score(y_test, model_good.predict(X_test_good))

print("GOOD FIT MODEL:")
print(f"  Train R²: {train_r2_good:.4f}")
print(f"  Test R²:  {test_r2_good:.4f}")
print()


# ═══════════════════════════════════════════════════════════
# STEP 5: FIX Overfitting with Regularization
# ═══════════════════════════════════════════════════════════
# TODO: Use Ridge or Lasso with degree=15 to fix overfitting

# Use the same poly_over features (degree=15)
model_fixed = Lasso(alpha=1)  # YOUR CODE: Ridge or Lasso with appropriate alpha
model_fixed.fit(X_train_over , y_train)  # YOUR CODE

train_r2_fixed = r2_score(y_train, model_fixed.predict(X_train_over))
test_r2_fixed = r2_score(y_test, model_fixed.predict(X_test_over))

print("FIXED MODEL (Regularization):")
print(f"  Train R²: {train_r2_fixed:.4f}")
print(f"  Test R²:  {test_r2_fixed:.4f}")


# ═══════════════════════════════════════════════════════════
# STEP 6: Create Summary Table
# ═══════════════════════════════════════════════════════════
print("\n" + "=" * 60)
print("SUMMARY TABLE")
print("=" * 60)
print(f"{'Model':<25} {'Train R²':<12} {'Test R²':<12} {'Status'}")
print("-" * 60)
print(f"{'Underfitting (deg=1)':<25} {train_r2_under:<12.4f} {test_r2_under:<12.4f} {'_____'}")  # Fill status
print(f"{'Overfitting (deg=15)':<25} {train_r2_over:<12.4f} {test_r2_over:<12.4f} {'_____'}")   # Fill status
print(f"{'Good Fit (deg=3)':<25} {train_r2_good:<12.4f} {test_r2_good:<12.4f} {'_____'}")       # Fill status
print(f"{'Fixed (Regularized)':<25} {train_r2_fixed:<12.4f} {test_r2_fixed:<12.4f} {'_____'}") # Fill status

UNDERFITTING MODEL:
  Train R²: 0.7258
  Test R²:  -0.0221

OVERFITTING MODEL:
  Train R²: 0.9639
  Test R²:  0.7913

GOOD FIT MODEL:
  Train R²: 0.9555
  Test R²:  0.8466

FIXED MODEL (Regularization):
  Train R²: 0.9577
  Test R²:  0.8477

SUMMARY TABLE
Model                     Train R²     Test R²      Status
------------------------------------------------------------
Underfitting (deg=1)      0.7258       -0.0221      _____
Overfitting (deg=15)      0.9639       0.7913       _____
Good Fit (deg=3)          0.9555       0.8466       _____
Fixed (Regularized)       0.9577       0.8477       _____


  model = cd_fast.enet_coordinate_descent(
