### Bayesian Regression

1. ARDRegression
2. BayesianRidge


### ARDRegression

In [None]:
# Import ARDRegression from sklearn.linear_model
import sklearn.linear_model

# Definition:
"""
ARDRegression:
Automatic Relevance Determination (ARD) Regression is a probabilistic model for linear regression 
with automatic relevance determination for feature selection. It assumes a Gaussian likelihood 
and a Laplace prior for the weights. This model is useful for sparse regression problems where only a 
small subset of features are important.

The hyperparameters control the model’s regularization and convergence criteria.
"""

# Code Example:
ardregression_model = sklearn.linear_model.ARDRegression(
    n_iter=300,                  # Maximum number of iterations (default=300)
    tol=0.001,                   # Convergence tolerance (default=1e-3)
    alpha_1=0.000001,            # Shape parameter for the Gamma distribution prior over alpha (default=1e-6)
    alpha_2=0.000001,            # Inverse scale parameter for the Gamma distribution prior over alpha (default=1e-6)
    lambda_1=0.000001,           # Shape parameter for the Gamma distribution prior over lambda (default=1e-6)
    lambda_2=0.000001,           # Inverse scale parameter for the Gamma distribution prior over lambda (default=1e-6)
    compute_score=False,         # Whether to compute the objective function at each step (default=False)
    threshold_lambda=10000,      # Threshold for removing (pruning) weights with high precision (default=10000)
    fit_intercept=True,          # Whether to fit an intercept (default=True)
    copy_X=True,                 # Whether to copy X or not (default=True)
    verbose=False                # Verbosity level (default=False)
)

# Hyperparameters:
ardregression_hyperparameters = {
    "n_iter": [100, 300, 500],               # Maximum number of iterations (default=300)
    "tol": [1e-4, 1e-3, 0.001],              # Convergence tolerance (default=1e-3)
    "alpha_1": [1e-6, 1e-5, 1e-4],           # Shape parameter for Gamma distribution prior over alpha (default=1e-6)
    "alpha_2": [1e-6, 1e-5, 1e-4],           # Inverse scale parameter for Gamma distribution prior over alpha (default=1e-6)
    "lambda_1": [1e-6, 1e-5, 1e-4],          # Shape parameter for Gamma distribution prior over lambda (default=1e-6)
    "lambda_2": [1e-6, 1e-5, 1e-4],          # Inverse scale parameter for Gamma distribution prior over lambda (default=1e-6)
    "compute_score": [True, False],          # Whether to compute the objective function (default=False)
    "threshold_lambda": [1000, 10000, 100000],# Threshold for pruning weights with high precision (default=10000)
    "fit_intercept": [True, False],          # Whether to fit an intercept (default=True)
    "copy_X": [True, False],                 # Whether to copy X (default=True)
    "verbose": [0, 1, 2]                     # Verbosity level (default=False)
}


### BayesianRidge

In [None]:
import sklearn.linear_model

# Definition:
"""
BayesianRidge:
Bayesian Ridge Regression is a probabilistic model for linear regression where the prior distribution 
on the coefficients is a Gaussian distribution with a mean of zero. It uses a conjugate prior and 
estimates the posterior distribution of the coefficients given the observed data. This model is 
regularized by adding a prior on the regression coefficients and noise variance.

The hyperparameters control the prior distributions for the model coefficients and the precision 
of the noise.
"""

# Code Example:
bayesianridge_model = sklearn.linear_model.BayesianRidge(
    n_iter=300,                   # Maximum number of iterations (default=300)
    tol=0.001,                    # Convergence tolerance (default=1e-3)
    alpha_1=0.000001,             # Shape parameter for the Gamma distribution prior over alpha (default=1e-6)
    alpha_2=0.000001,             # Inverse scale parameter for the Gamma distribution prior over alpha (default=1e-6)
    lambda_1=0.000001,            # Shape parameter for the Gamma distribution prior over lambda (default=1e-6)
    lambda_2=0.000001,            # Inverse scale parameter for the Gamma distribution prior over lambda (default=1e-6)
    alpha_init=None,              # Initial value for alpha (precision of the noise). Default: 1/Var(y)
    lambda_init=None,             # Initial value for lambda (precision of the weights). Default: 1
    compute_score=False,          # If True, compute the log marginal likelihood at each iteration (default=False)
    fit_intercept=True,           # Whether to calculate the intercept (default=True)
    copy_X=True,                  # Whether to copy X (default=True)
    verbose=False                 # Verbosity level (default=False)
)

# Hyperparameters:
bayesianridge_hyperparameters = {
    "n_iter": [100, 300, 500],           # Maximum number of iterations (default=300)
    "tol": [1e-4, 1e-3, 0.001],          # Convergence tolerance (default=1e-3)
    "alpha_1": [1e-6, 1e-5, 1e-4],       # Shape parameter for Gamma distribution prior over alpha (default=1e-6)
    "alpha_2": [1e-6, 1e-5, 1e-4],       # Inverse scale parameter for Gamma distribution prior over alpha (default=1e-6)
    "lambda_1": [1e-6, 1e-5, 1e-4],      # Shape parameter for Gamma distribution prior over lambda (default=1e-6)
    "lambda_2": [1e-6, 1e-5, 1e-4],      # Inverse scale parameter for Gamma distribution prior over lambda (default=1e-6)
    "alpha_init": [None, 0.1, 1],        # Initial value for alpha (precision of the noise)
    "lambda_init": [None, 0.1, 1],       # Initial value for lambda (precision of the weights)
    "compute_score": [True, False],      # Whether to compute the log marginal likelihood (default=False)
    "fit_intercept": [True, False],      # Whether to fit an intercept (default=True)
    "copy_X": [True, False],             # Whether to copy X (default=True)
    "verbose": [0, 1, 2]                 # Verbosity level (default=False)
}
