In [1]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Import the OnlineConstrLS class from the previous implementation
# Assuming it's in a file named online_constrained_ls.py
from online_constrained_ls import OnlineConstrLS

def generate_data(n_samples, n_features, noise=0.1):
    """Generate random data for testing."""
    X = np.random.rand(n_samples, n_features)
    true_weights = np.random.rand(n_features)
    true_weights /= np.sum(true_weights)  # Normalize to sum to 1
    y = np.dot(X, true_weights) + np.random.normal(0, noise, n_samples)
    return X, y, true_weights

def run_monte_carlo(n_trials, n_samples, n_features, noise=0.1):
    """Run Monte Carlo simulation."""
    ocls_mses = []
    ls_mses = []
    weight_errors_ocls = []
    weight_errors_ls = []

    for _ in range(n_trials):
        X, y, true_weights = generate_data(n_samples, n_features, noise)

        # Split data into train and test
        train_size = int(0.8 * n_samples)
        X_train, X_test = X[:train_size], X[train_size:]
        y_train, y_test = y[:train_size], y[train_size:]

        # Train and evaluate OnlineConstrLS
        ocls = OnlineConstrLS(unit_interval=True, learning_rate=0.01)
        ocls.fit(X_train, y_train)
        ocls_pred = ocls.predict(X_test)
        ocls_mse = mean_squared_error(y_test, ocls_pred)
        ocls_mses.append(ocls_mse)
        weight_errors_ocls.append(np.mean(np.abs(ocls.coef_ - true_weights)))

        # Train and evaluate standard Least Squares
        ls = LinearRegression()
        ls.fit(X_train, y_train)
        ls_pred = ls.predict(X_test)
        ls_mse = mean_squared_error(y_test, ls_pred)
        ls_mses.append(ls_mse)
        weight_errors_ls.append(np.mean(np.abs(ls.coef_ - true_weights)))

    return ocls_mses, ls_mses, weight_errors_ocls, weight_errors_ls

def plot_results(ocls_mses, ls_mses, weight_errors_ocls, weight_errors_ls):
    """Plot the results of the Monte Carlo simulation."""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    ax1.boxplot([ocls_mses, ls_mses], labels=['OnlineConstrLS', 'LinearRegression'])
    ax1.set_ylabel('Mean Squared Error')
    ax1.set_title('MSE Comparison')

    ax2.boxplot([weight_errors_ocls, weight_errors_ls], labels=['OnlineConstrLS', 'LinearRegression'])
    ax2.set_ylabel('Mean Absolute Weight Error')
    ax2.set_title('Weight Error Comparison')

    plt.tight_layout()
    plt.show()

ModuleNotFoundError: No module named 'matplotlib'

In [None]:
# Run the simulation
n_trials = 100
n_samples = 1000
n_features = 5
noise = 0.1

ocls_mses, ls_mses, weight_errors_ocls, weight_errors_ls = run_monte_carlo(n_trials, n_samples, n_features, noise)

# Plot the results
plot_results(ocls_mses, ls_mses, weight_errors_ocls, weight_errors_ls)

# Print summary statistics
print("OnlineConstrLS - Mean MSE:", np.mean(ocls_mses))
print("LinearRegression - Mean MSE:", np.mean(ls_mses))
print("OnlineConstrLS - Mean Weight Error:", np.mean(weight_errors_ocls))
print("LinearRegression - Mean Weight Error:", np.mean(weight_errors_ls))

In [2]:
# Print python version
import sys

print(sys.version)


3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:41:52) [Clang 15.0.7 ]
