In [1]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd

In [2]:
from sklearn.datasets import make_regression
import numpy as np

# Generate a sparse dataset with 100 features, but only 10 are informative
X, y = make_regression(n_samples=20, n_features=10, n_informative=5, noise=0.1, random_state=42)

# Add some noise to the features
X += np.random.normal(0, 0.1, X.shape)

# Convert X (features) to a DataFrame
df = pd.DataFrame(X, columns=[f'Feature_{i}' for i in range(1, 11)])

# Convert y (target) to a Series and add it as a column to the DataFrame
df['Target'] = y
df

Unnamed: 0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,Feature_9,Feature_10,Target
0,1.580067,-0.36517,0.059897,-0.536622,-0.348504,-0.484733,-1.497837,0.385945,-1.323467,-0.14584,-3.051826
1,-1.548337,-1.29081,1.707571,0.784185,0.250977,-1.012427,0.612986,-0.23416,-0.736184,-0.804028,-204.949795
2,-0.572347,-1.396833,-0.592278,-0.964234,-0.580284,-1.834111,-1.794062,0.375529,-1.055606,0.223601,-254.76822
3,-0.477061,-1.205711,0.365134,0.158661,0.843377,-0.059086,-1.150812,0.13716,-0.852791,0.155398,-42.063748
4,-0.176383,0.52932,-0.63026,-0.625064,0.34437,-0.795004,-0.52632,0.319223,0.918143,1.474464,44.852701
5,0.2143,0.467487,-0.651192,-0.966052,0.37369,0.151813,0.30093,0.517011,1.938119,-0.821382,27.008127
6,-0.658784,-0.009354,-1.122774,-1.328369,1.870026,0.960017,-1.004787,-2.084901,0.192157,-0.296464,38.771033
7,-0.364654,-0.650782,1.452456,0.372944,-0.183675,0.857889,-1.056812,1.070698,-0.071479,-0.987405,-187.586072
8,0.416568,-1.554198,0.641936,-0.715564,-0.715015,-0.405405,0.539933,0.083288,0.427926,-1.080011,-57.563286
9,0.827323,-0.391321,1.999669,0.290159,-0.802519,0.66104,-1.494336,-0.578043,-0.98979,1.564106,22.319346


In [4]:
# Load your dataset (use a real or synthetic dataset)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create Ridge model
ridge = Ridge(alpha=0.1)  # alpha is the regularization strength (λ)
ridge.fit(X_train, y_train)

# Predictions
y_pred_train_ridge = ridge.predict(X_train)
y_pred_test_ridge = ridge.predict(X_test)

# Evaluate the model
train_mse_ridge = mean_squared_error(y_train, y_pred_train_ridge)
test_mse_ridge = mean_squared_error(y_test, y_pred_test_ridge)

print(f"Training MSE (Ridge): {train_mse_ridge}")
print(f"Test MSE (Ridge): {test_mse_ridge}")
print(f"Coefficients (Ridge): {ridge.coef_}")  # Check the magnitude of coefficients

Training MSE (Ridge): 67.10826396941609
Test MSE (Ridge): 1740.3258309498574
Coefficients (Ridge): [76.63587598 -9.41251106 -0.79615394 -6.51520854 71.70336556 32.57098485
 69.8788155  -6.0507689   7.98745529 71.39354302]
