# Examples

`rlassopy` includes three estimators `Rlasso`, `RlassoEffects` and `RlassoIV`. To get started we will generate a sparse design matrix.

In [6]:
# imports
from rlassopy import Rlasso
import pandas as pd
import numpy as np

In [7]:
# Generate data some high dim data,
# Example following cvxpy doc, see: 
# https://www.cvxpy.org/examples/machine_learning/lasso_regression.html
def generate_data(m=100, n=20, sigma=5, density=0.2):
    "Generates data matrix X and observations Y."
    np.random.seed(1)
    beta_star = np.random.randn(n)
    idxs = np.random.choice(range(n), int((1-density)*n), replace=False)
    for idx in idxs:
        beta_star[idx] = 0
    X = np.random.randn(m,n)
    y = X.dot(beta_star) + np.random.normal(0, sigma, size=m)
    return X, y, beta_star

m = 100
n = 20
sigma = 5
density = 0.2

X, y, beta_star = generate_data(m, n, sigma)

We can instantiate the model with the parameters we want and pass the data through the `fit` method. For example:

In [8]:
rlasso = Rlasso(post=True, fit_intercept=False, cov_type="robust").fit(X,y)

We can now access the trained values from the model

In [9]:
print(f"lambda: {rlasso.lambd_}")
print(f"Estimated betas: \n: {rlasso.coef_}")
print(f"Truth: \n {beta_star}")

lambda: 71.88088299949396
Estimated betas: 
: [ 0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.         -2.08769227
  0.          0.          0.          0.          0.          0.
  0.          0.        ]
Truth: 
 [ 0.          0.          0.          0.          0.86540763  0.
  0.          0.          0.          0.          0.         -2.06014071
 -0.3224172   0.          0.          0.          0.         -0.87785842
  0.          0.        ]


and `predict` to pass predict some new unsean data:

In [10]:
X_new = np.random.randn(m,n)
e = y - rlasso.predict(X_new)
print(f"MSE: {np.mean(e**2)}")

MSE: 37.15980982826132


An alternative to `fit`, is `fit_formula` which adopts the formula language from `R`.

In [20]:
# create pandas dataframe
cols = ["y"] + [f"x{i}" for i in range(1,X.shape[1]+1)]
data = pd.DataFrame(np.hstack([y.reshape(-1,1), X]), columns = cols)

# define formula
formula = "y ~ x1 + x2 + x12 + x15 -1"

rlasso = Rlasso(sqrt=True, post=True, fit_intercept=False, zero_tol=1e-8,
                cov_type="nonrobust", x_dependent=False).fit_formula(formula, data)

print(f"lambda: {rlasso.lambd_}")
print(f"coefs: \n: {rlasso.coef_}")
print(f"iterations: \n: {rlasso.n_iter_}")

lambda: 30.584739993575592
coefs: 
: [ 0.          0.         -2.08769227  0.        ]
iterations: 
: 0
