In [1]:
import numpy as np
import matplotlib.pyplot as plt

from dml_simulation.ML.simulation_ml import (
    monte_carlo,
    plot_mse_distribution,
    plot_learner_comparison
)

In [2]:
# Hyperparameter Configuration
# Monte Carlo parameters
n = 200
p = 200
sigma = 1.0
n_rep = 50

# Sparse true parameter
rng = np.random.default_rng(123)
beta = rng.normal(0, 1, size=p)
beta[50:] = 0

learners = ["ols", "lasso", "elasticnet", "rf", "gboost"]

In [3]:
# Run Monte Carlo Simulations
results = {}

for learner in learners:
    print(f"Running learner: {learner}")
    mses = monte_carlo(
        n=n,
        p=p,
        beta=beta,
        sigma=sigma,
        learner=learner,
        n_rep=n_rep
    )
    results[learner] = mses

    print(f" Avg MSE: {mses.mean():.4f}")
    print(f" Std MSE: {mses.std():.4f}")

Running learner: ols
 Avg MSE: 7.7805
 Std MSE: 1.6296
Running learner: lasso
 Avg MSE: 2.6126
 Std MSE: 0.3404
Running learner: elasticnet
 Avg MSE: 2.6465
 Std MSE: 0.3580
Running learner: rf
 Avg MSE: 9.7162
 Std MSE: 0.7689
Running learner: gboost
 Avg MSE: 7.3949
 Std MSE: 0.6352


In [4]:
# Plot MSE Distributions
for learner, mses in results.items():
    plot_mse_distribution(mses, learner)
    plt.show()

In [5]:
plot_learner_comparison(results)
plt.show()