In [8]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd

In [9]:
from sklearn.datasets import make_regression
import numpy as np

# Generate a sparse dataset with 100 features, but only 10 are informative
X, y = make_regression(n_samples=20, n_features=10, n_informative=5, noise=0.1, random_state=42)

# Add some noise to the features
X += np.random.normal(0, 0.1, X.shape)

# Convert X (features) to a DataFrame
df = pd.DataFrame(X, columns=[f'Feature_{i}' for i in range(1, 11)])

# Convert y (target) to a Series and add it as a column to the DataFrame
df['Target'] = y
df

Unnamed: 0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,Feature_9,Feature_10,Target
0,1.423267,-0.352573,0.166128,-0.543769,-0.256477,-0.662934,-1.255968,0.257309,-1.377619,0.000837,-3.051826
1,-1.492692,-1.087106,1.528572,0.757318,0.252383,-0.907608,0.510863,-0.374431,-0.791272,-1.065993,-204.949795
2,-0.328911,-1.449173,-0.722453,-0.934405,-0.508455,-1.605603,-2.00509,0.42936,-1.154398,0.318296,-254.76822
3,-0.346919,-1.073167,0.408644,-0.146411,0.772457,0.072071,-1.272339,0.039754,-0.853717,0.273774,-42.063748
4,-0.411129,0.626315,-0.533617,-0.458881,0.293092,-0.799542,-0.623161,0.097769,1.082682,1.606828,44.852701
5,0.130032,0.769164,-0.784593,-1.054978,0.383183,0.421537,0.246377,0.393153,1.930365,-0.64331,27.008127
6,-0.676504,0.219155,-1.229386,-1.16583,1.757647,0.850676,-1.159159,-1.955269,0.094174,0.010162,38.771033
7,-0.508536,-0.679392,1.504258,0.340378,-0.193415,0.851875,-1.287364,0.911492,-0.001339,-1.236602,-187.586072
8,0.711955,-1.440009,0.481205,-0.905679,-0.880285,-0.466945,0.379216,0.009997,0.409677,-1.110818,-57.563286
9,0.793987,-0.547144,2.102826,0.082765,-0.820576,0.567452,-1.383677,-0.467218,-1.109718,1.280065,22.319346


In [10]:
# Load your dataset (use a real or synthetic dataset)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create Lasso model
lasso = Lasso(alpha=0.1)  # alpha is the regularization strength (λ)
lasso.fit(X_train, y_train)

# Predictions
y_pred_train = lasso.predict(X_train)
y_pred_test = lasso.predict(X_test)

# Evaluate the model
train_mse = mean_squared_error(y_train, y_pred_train)
test_mse = mean_squared_error(y_test, y_pred_test)

print(f"Training MSE: {train_mse}")
print(f"Test MSE: {test_mse}")
print(f"Coefficients: {lasso.coef_}")  # Check which coefficients were reduced to zero

Training MSE: 119.3271117852268
Test MSE: 591.3288341834407
Coefficients: [86.83474432  0.         -4.66732714 -0.         66.42336118 44.46804136
 73.24630663 -1.90963799 -3.25422063 71.36836774]
