# Polynomial Regression Analysis

Exploring polynomial features for non-linear relationship modeling.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import validation_curve
from sklearn.metrics import mean_squared_error

In [None]:
# Generate non-linear dataset
np.random.seed(789)
X = np.linspace(0, 1, 50).reshape(-1, 1)
y = 1.5 * X.ravel() + 2 * X.ravel()**2 - 3 * X.ravel()**3 + 0.1 * np.random.randn(50)

# Polynomial degrees to test
degrees = np.arange(1, 8)
train_scores = []
val_scores = []

for degree in degrees:
    poly_reg = Pipeline([
        ('poly', PolynomialFeatures(degree=degree)),
        ('linear', LinearRegression())
    ])
    
    train_score, val_score = validation_curve(
        poly_reg, X, y, param_name='poly__degree', 
        param_range=[degree], cv=5, scoring='neg_mean_squared_error'
    )
    
    train_scores.append(-train_score.mean())
    val_scores.append(-val_score.mean())

print(f"Optimal degree: {degrees[np.argmin(val_scores)]}")