In [None]:
# Install required packages

!pip install tensorflow==2.18.0
!pip install keras==3.7.0
!pip install torch==2.5.1
!pip install torchvision==0.20.1

!pip install numpy==2.0.2
!pip install scipy==1.14.1
!pip install pandas==2.2.3

!pip install scikit-learn==1.5.2

!pip install matplotlib==3.9.2

!pip install joblib==1.4.2
!pip install python-dateutil==2.9.0.post0

!pip install sympy==1.13.1
!pip install opt-einsum==3.4.0

!pip install tensorboard==2.18.0
!pip install protobuf==5.29.0
!pip install threadpoolctl==3.5.0
!pip install packaging==24.2


#1. Import Necessary Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

* numpy: *Used for numerical operations and dataset creation*.
* matplotlib.pyplot: *For visualizing MSE and $R^2$ scores*.
* PolynomialFeatures: *Expands input features into polynomial terms for model complexity adjustment*.
* Ridge: *Implements Ridge regression with L2 regularization to reduce overfitting.*
* mean_squared_error and r2_score: *Metrics to evaluate regression performance.*
* train_test_split: *Splits the dataset into training and testing sets.*


#2. Generate Synthetic Dataset

In [None]:
# Generate synthetic dataset
np.random.seed(42)
n_samples = 30  # Dataset size
X = np.random.uniform(-1, 1, size=(n_samples, 1))
y = np.sin(2 * np.pi * X).ravel() + 0.3 * np.random.normal(size=n_samples)

* **X**: Input features are *uniformly sampled in the range [-1, 1].*
* **y**: Output labels are based on a sine function with added Gaussian noise (
  *𝜎 = 0.3* ), simulating real-world noisy data.

#3. Define Parameters

In [None]:
# Weight decay values to analyze (L2 regularization strength)
weight_decay_values = [0.0, 0.1, 0.5, 1.0, 5.0]

# Initialize results
degrees = np.arange(1, 50)  # Model complexity: Polynomial degrees
results = {wd: {'train_errors': [], 'test_errors': [], 'train_r2': [], 'test_r2': []}
           for wd in weight_decay_values}

* **weight_decay_values**: Specifies the *L2 regularization strength* (**alpha parameter** in Ridge regression). *Values range from no regularization (0.0) to strong regularization (5.0)*.
* **degrees**: *Represents polynomial degrees, controlling model complexity*.
* **results**: *Dictionary structure* to store MSE and $R^2$
  scores for training and testing datasets across different weight decay values.

#4. Iterate Over Weight Decay Values and Compute Results

In [None]:
# Iterate over weight decay values
for weight_decay in weight_decay_values:
    train_errors = []
    test_errors = []
    train_r2_scores = []
    test_r2_scores = []

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

    for degree in degrees:
        poly = PolynomialFeatures(degree=degree)
        X_train_poly = poly.fit_transform(X_train)
        X_test_poly = poly.transform(X_test)

        # Fit polynomial regression model with weight decay (Ridge regression)
        model = Ridge(alpha=weight_decay)  # Alpha controls the weight decay strength
        model.fit(X_train_poly, y_train)

        # Calculate train and test errors
        y_train_pred = model.predict(X_train_poly)
        y_test_pred = model.predict(X_test_poly)

        train_errors.append(mean_squared_error(y_train, y_train_pred))
        test_errors.append(mean_squared_error(y_test, y_test_pred))
        train_r2_scores.append(r2_score(y_train, y_train_pred))
        test_r2_scores.append(r2_score(y_test, y_test_pred))

    results[weight_decay]['train_errors'] = train_errors
    results[weight_decay]['test_errors'] = test_errors
    results[weight_decay]['train_r2'] = train_r2_scores
    results[weight_decay]['test_r2'] = test_r2_scores

1. Outer Loop:
* *Iterates over all L2 regularization values*.
2. Inner Loop:

For each **polynomial degree**:
1. **Feature expansion**:
* *Expands features into polynomial terms* using **PolynomialFeatures**.
2. **Model training**:
* Ridge regression (*with the current weight decay value*) is used to fit the training data.

3. **Performance metrics**:
* mean_squared_error: *Quantifies the error in predictions*.
* r2_score: *Measures how well the model explains variance in the data*.

Results are *appended to lists* for later visualization.

#5. Visualize Mean Squared Error (MSE) Results

In [None]:
# Plot MSE results
plt.figure(figsize=(12, 8))
for weight_decay in weight_decay_values:
    plt.plot(degrees, results[weight_decay]['test_errors'], label=f"Test Loss (Weight Decay={weight_decay})", marker='o')
    plt.plot(degrees, results[weight_decay]['train_errors'], linestyle='--', label=f"Train Loss (Weight Decay={weight_decay})")

plt.xlabel("Model Complexity (Polynomial Degree)")
plt.ylabel("Mean Squared Error (Log Scale)")
plt.yscale("log")
plt.title("Effect of Weight Decay Regularization on Double Descent (MSE)")
plt.legend()
plt.grid()
plt.show()

**MSE Plot**
* **Purpose**: *Plots training and testing MSE against model complexity for different weight decay values*.
* **Logarithmic scaling**: *Helps visualize differences across orders of magnitude*.
* **Line Representation**:
   * Dashed lines represent *training loss*.
   * Solid lines represent *test loss*.

#6. Visualize 𝑅^2 Score Results

In [None]:
# Plot R2 results
plt.figure(figsize=(12, 8))
for weight_decay in weight_decay_values:
    plt.plot(degrees, results[weight_decay]['test_r2'], label=f"Test R² (Weight Decay={weight_decay})", marker='o')
    plt.plot(degrees, results[weight_decay]['train_r2'], linestyle='--', label=f"Train R² (Weight Decay={weight_decay})")

plt.xlabel("Model Complexity (Polynomial Degree)")
plt.ylabel("R² Score")
plt.title("Effect of Weight Decay Regularization on Double Descent (R²)")
plt.legend()
plt.grid()
plt.show()

$R^2$ Plot
* **Purpose**: *Visualizes $R^2$
  scores for training and testing data*, indicating the proportion of variance explained by the model.
* **Highlights**: *Shows how weight decay influences generalization performance across different polynomial complexities*.

# Key Takeaways
* Effect of Weight Decay:
   * **Higher weight decay values** *smooth the test loss curve, mitigating overfitting but potentially underfitting simpler data*.
   * **Moderate weight decay** *balances overfitting and underfitting*, achieving stable test performance.