In [2]:
# Importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# Load dataset (Boston Housing dataset)
boston = fetch_california_housing()
X = pd.DataFrame(boston.data, columns=boston.feature_names)
y = boston.target
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Apply Lasso Regression for feature selection
lasso = Lasso(alpha=0.1) # The regularization strength is controlled by alpha
lasso.fit(X_train_scaled, y_train)
# Get the coefficients of the features
lasso_coefficients = pd.Series(lasso.coef_, index=X.columns)
# Print non-zero coefficients (selected features)
selected_features = lasso_coefficients[lasso_coefficients != 0]
print(f"Selected Features by Lasso: \n{selected_features}")
# Evaluate the model performance on the test set
lasso_test_score = lasso.score(X_test_scaled, y_test)
print(f"Model performance with selected features (R^2 score): {lasso_test_score}")

Selected Features by Lasso: 
MedInc      0.710598
HouseAge    0.106453
Latitude   -0.011469
dtype: float64
Model performance with selected features (R^2 score): 0.48136113250290735
