<a href="https://colab.research.google.com/github/LoosonWu/geek_homework/blob/main/lasso.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [50]:
import jax 
from jax import lax
from jax import random
import jax.numpy as jnp
import numpy as np
from sklearn.linear_model import Ridge,RidgeCV,Lasso,LassoCV
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot as plt

In [38]:
def ini_data(n,dim):
    # '''
    # 初始化数据
    # :param n: 数据总量
    # :param dim: 参数维度
    # :return: x，y，b（系数）
    # '''
    key = random.PRNGKey(0)
    x_key, beta_key, beta_test_key = random.split(key,3)
    x = random.normal(x_key, (n, dim))
    b = np.random.rand(dim)
    a = np.random.rand(n)
    y = np.matmul(x,b)+a
    return x,y,b

In [39]:
n = 500  # data length
dims = 3
x,y,b = ini_data(n,dims)

In [42]:
print(b)

[0.25086886 0.50064072 0.50290927]


In [54]:
def reg_model_Ridge(x,y,alphas,dim):
    # '''
    # ；岭回归估计
    # :param x:
    # :param y:
    # :param alphas: 随机生成多个模型参数Lambda
    # :param dim:维度
    # :return: ridge_B 最优模型的系数
    # '''
    model_coff=[]
    for alpha in alphas:
        ridge = Ridge(alpha=alpha,normalize=True)
        ridge.fit(x,y)
        model_coff.append(ridge.coef_)
    # if dim<=10:
    # plt.plot(alphas, model_coff, 'Log Alpha', 'Cofficients', 'alpha系数与岭回归系数的关系 ,dim='+str(dim))
    # 交叉验证，找到模型最优的Lambda值
    ridge_cv= RidgeCV(alphas=alphas,normalize=True,scoring="neg_mean_absolute_error", cv=5)
    ridge_cv.fit(x,y)
    ridge_best_lambda = ridge_cv.alpha_
    # 建立最优模型
    ridge = Ridge(alpha=ridge_best_lambda,normalize=True)
    ridge.fit(x,y)
    # 得到最优模型的系数
    ridge_B = ridge.coef_
    return ridge_B

In [12]:
def reg_model_LASSO(x,y,alphas,dim):
    # '''
    # ；LASSO 回归
    # :param x:
    # :param y:
    # :param alphas: 随机生成多个模型参数Lambda
    # :param dim:维度
    # :return: lasso_B 最优模型的系数
    # '''
    model_coff=[]
    for alpha in alphas:
        lasso = Lasso(alpha=alpha,normalize=True)
        lasso.fit(x,y)
        model_coff.append(lasso.coef_)
    # if dim <= 10:
    #plot_data(alphas, model_coff, 'Log Alpha', 'Cofficients', 'alpha系数与LASSO系数的关系 ,dim='+str(dim))
    # 交叉验证，找到模型最优的Lambda值
    lasso_cv= LassoCV(alphas=alphas,normalize=True,max_iter=1000, cv=5)
    lasso_cv.fit(x,y)
    ridge_best_lambda = lasso_cv.alpha_
    # 建立最优模型
    lasso = Lasso(alpha=ridge_best_lambda,normalize=True)
    lasso.fit(x,y)
    # 得到最优模型的系数
    lasso_B = lasso.coef_
    return lasso_B

In [56]:
def run_fun():
    n =500  # data length
    dims = [10,50,100,200]
    for dim in dims:
        # alphas = 10 ** np.random.uniform(-5,5,dim)
        alphas = 10 ** jnp.linspace(-5, 5, dim)
        x, y, b=ini_data(n,dim)
        ridge_B=reg_model_Ridge(x,y,alphas,dim)
        RMSE = jnp.sqrt(mean_squared_error(ridge_B, b))
        print("----------维度：", dim, ",---------")
        print("岭回归 MSE :",RMSE)
        lasso_B=reg_model_LASSO(x,y,alphas,dim)
        LMSE = jnp.sqrt(mean_squared_error(lasso_B, b))
        print("LASSO MSE :", LMSE)
run_fun()

----------维度： 10 ,---------
岭回归 MSE : 0.013459446
LASSO MSE : 0.01351553
----------维度： 50 ,---------
岭回归 MSE : 0.014237818
LASSO MSE : 0.014073374
----------维度： 100 ,---------
岭回归 MSE : 0.014001459
LASSO MSE : 0.013400429
----------维度： 1000 ,---------
岭回归 MSE : 0.3868605


  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  po

LASSO MSE : 0.50412494
