In [None]:
#Lasso
from sklearn.linear_model import Lasso
alphas = np.linspace(0.01, 1, num=100) #We need smaller values of alpha in the grid

ws = [] # Store coefficients
mses_train = [] # Store training mses
mses_test = [] # Store test mses

# 定义 ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), [0]),  # 对分类特征进行 OneHot 编码
        ('num', StandardScaler(), [1,2,3,4,5,6,7,8,9])  # 对数值特征进行标准化
    ],
    remainder='passthrough'  # 保留其他未指定的特征
)

for a in alphas:
    m = make_pipeline(
        preprocessor,  # 使用 ColumnTransformer 进行特征预处理
        Lasso(alpha=a)  # 使用 Lasso 模型
    ).fit(X_train_DX, y_train_DX)
    
    w_temp = np.copy(m[1].coef_)
    ws.append(w_temp) 
    mses_train.append(mean_squared_error(y_train_DX, m.predict(X_train_DX)))
    mses_test.append(mean_squared_error(y_test_DX, m.predict(X_test_DX)))

# 获取处理后的列名
feature_names = preprocessor.get_feature_names_out()

# Create a data frame for plotting
sol_path = pd.DataFrame(
    data = ws,
    columns = feature_names # Label columns w/ feature names
).assign(
    alpha = alphas,
).melt(
    id_vars = ('alpha')
)

# Plot solution path of the weights
plt.figure(figsize=(10,6))
ax = sns.lineplot(x='alpha', y='value', hue='variable', data=sol_path)
ax.set_title("Lasso Coefficients")
plt.show()

In [None]:
# Grid of tuning parameters
alphas = np.linspace(0.001, 1, num=100)

#Pipeline
m = make_pipeline(
        preprocessor,  # 使用 ColumnTransformer 进行特征预处理
        Lasso()  # 使用 Lasso 模型
    )

# CV strategy
cv = KFold(5, shuffle=True, random_state=random_seed)

# Grid search
gs_lasso = GridSearchCV(m,
    param_grid={'lasso__alpha': alphas},
    cv=cv,
    scoring="neg_mean_squared_error")
gs_lasso.fit(X_train_DX, y_train_DX)

print(gs_lasso.best_params_)
print(-gs_lasso.best_score_)

model_fit(gs_lasso.best_estimator_, X_test_DX, y_test_DX, plot=True)

In [None]:
cv_mse = pd.DataFrame(
    data = gs_lasso.cv_results_
).filter(
    # Extract the split#_test_score and mean_test_score columns
    regex = '(split[0-9]+|mean)_test_score'
).assign(
    # Add the alphas as a column
    alpha = alphas
)

cv_mse.update(
    # Convert negative mses to positive
    -1 * cv_mse.filter(regex = '_test_score')
)
plt.figure(figsize=(10,6))
ax = sns.lineplot(x='alpha', y='mean_test_score', data=cv_mse)
ax.set_ylabel('CV MSE')
plt.show()

d = cv_mse.melt(
    id_vars=('alpha','mean_test_score'),
    var_name='fold',
    value_name='MSE'
)

# Plot the validation scores across folds
plt.figure(figsize=(10,7))
sns.lineplot(x='alpha', y='MSE', color='black', errorbar=None, data = d)  # Plot the mean MSE in black.
sns.lineplot(x='alpha', y='MSE', hue='fold', data = d) # Plot the curves for each fold in different colors
plt.show()