In [1]:
import numpy as np
from nptyping import NDArray, Shape, Int, Float, Bool
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error as mse
from rich import print

In [16]:
def make_1d_regression_data(n: int = 21) -> tuple[NDArray, NDArray, NDArray, NDArray]:
    """Generate 1D regression data with a quadratic target function and Gaussian noise.

    Parameters
    ----------
    n : int, optional
        Number of data points in the training set, by default 21.

    Returns
    -------
    train_data : numpy.ndarray
        Training data input (shape: (n,)).
    train_target : numpy.ndarray
        Training data target (shape: (n,)).
    test_data : numpy.ndarray
        Test data input (shape: (200,)).
    test_target : numpy.ndarray
        Test data target (shape: (200,)).

    Examples
    --------
    >>> train_data, train_target, test_data, test_target = make_1d_regression_data(n=21)
    """
    np.random.seed(0)
    train_data = np.linspace(0.0, 20, n)
    test_data = np.arange(0.0, 20, 0.1)
    noise_std = np.sqrt(4)
    coef = np.array([-1.5, 1 / 9.0])
    target_function = lambda x: coef[0] * x + coef[1] * np.square(x)
    train_target = target_function(train_data) + np.random.normal(0, 1, train_data.shape) * noise_std
    test_target = target_function(test_data) + np.random.normal(0, 1, test_data.shape) * noise_std
    return train_data, train_target, test_data, test_target


# Generate 1D regression data
train_data, train_target, test_data, test_target = make_1d_regression_data(n=21)

# Rescale data using MinMaxScaler with range (-1, 1)
scaler = MinMaxScaler(feature_range=(-1, 1))
scaled_train_data = scaler.fit_transform(train_data.reshape(-1, 1))
scaled_test_data = scaler.transform(test_data.reshape(-1, 1))

# Set degree for polynomial features
degree = 14
# Generate a range of L2 regularization parameters (alphas)
l2_regularizers = np.logspace(-10, 1.3, 10)
n_regularizers = len(l2_regularizers)

# Initialize arrays for storing train and test errors
train_errors = np.empty(n_regularizers)
test_errors = np.empty(n_regularizers)

# Initialize a dictionary for storing test predictions
test_predictions = dict()

# Loop through each alpha value and fit a Ridge Regression model
for i, alpha in enumerate(l2_regularizers):
    # Initialize Ridge model with the current alpha value
    ridge_model = Ridge(alpha=alpha, fit_intercept=False)
    
    # Generate polynomial features for train data
    poly_features = PolynomialFeatures(degree=degree, include_bias=False)
    train_data_poly = poly_features.fit_transform(scaled_train_data)
    
    # Fit the Ridge model
    ridge_model.fit(train_data_poly, train_target)
    
    # Make predictions on train data
    train_prediction = ridge_model.predict(train_data_poly)
    
    # Generate polynomial features for test data
    test_data_poly = poly_features.transform(scaled_test_data)
    
    # Make predictions on test data
    test_prediction = ridge_model.predict(test_data_poly)
    
    # Calculate mean squared errors for train and test data
    train_errors[i] = mse(train_prediction, train_target)
    test_errors[i] = mse(test_prediction, test_target)
    
    # Store test predictions in a dictionary with the corresponding alpha value
    test_predictions[alpha] = test_prediction


# Plot MSE vs degree
fig = go.Figure()
fig.add_scatter(x=l2_regularizers, y=test_errors, mode='markers+lines', marker_symbol='star', marker_size=8, line_color='red', name='test')
fig.add_scatter(x=l2_regularizers, y=train_errors, mode='markers+lines', marker_symbol='square', marker_size=8, line_color='blue', name='train')
fig.update_xaxes(type='log', title='L2 regularizer')
fig.update_yaxes(title='mse')
fig.show()

# Plot fitted functions
selected_alphas = l2_regularizers[[0, 5, 8]]
for alpha in selected_alphas:
    fig = px.scatter(x=train_data, y=train_target, labels={'x': 'Data', 'y': 'Target'})
    fig.add_scatter(x=test_data, y=test_predictions[alpha], mode='lines', line=dict(color='red'), name=f'L2 regularizer {alpha:.5f}')
    fig.update_layout(title=f'L2 regularizer {alpha:.5f}')
    fig.show()
