In [4]:
import pandas as pd
import numpy as np
from sklearn.linear_model import RidgeCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

df = pd.read_csv("ridge_correlated_150.csv")

X = df[["x1", "x2"]]   
y = df["y"]            

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
alphas = np.logspace(-3, 3, 50)
ridge = RidgeCV(alphas=alphas)

ridge.fit(X_train, y_train)

y_pred = ridge.predict(X_test)


r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print("Best alpha:", ridge.alpha_.round(3))
print("Coefficients:", ridge.coef_)
print("Intercept:", ridge.intercept_)
print("R² (test):", r2)
print("RMSE (test):", rmse)


Best alpha: 0.007
Coefficients: [ 4.41273599 -3.41175106]
Intercept: 0.012383721735237843
R² (test): 0.6538727524919414
RMSE (test): 0.5705925764636008


In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LassoCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
df = pd.read_csv("lasso_sparse_150.csv")
X = df.drop("y", axis=1)   
y = df["y"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

lasso = LassoCV(cv=5, random_state=42, max_iter=10000)
lasso.fit(X_train, y_train)
y_pred = lasso.predict(X_test)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
selected_features = X.columns[lasso.coef_ != 0]
print("Best alpha:", lasso.alpha_)
print("Selected features:", list(selected_features))
print("R² (test):", r2)
print("RMSE (test):", rmse)


Best alpha: 0.06662744526920758
Selected features: ['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9', 'x11', 'x12', 'x13', 'x16', 'x18', 'x21', 'x23', 'x24', 'x26', 'x28', 'x29']
R² (test): 0.9620885670252837
RMSE (test): 1.1860751609124554
