In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.metrics import r2_score, mean_squared_error

data = fetch_california_housing(as_frame=True)
X = data.data
y = data.target

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

model = Lasso(alpha=0.1)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

coef = pd.Series(model.coef_, index=X.columns)
non_zero_features = coef[coef != 0]

print("R2 Score:", r2)
print("MSE:", mse)
print("\nSelected Features (Non-zero Coefficients):")
print(non_zero_features)
print(f"\nTotal Selected Features: {len(non_zero_features)} out of {len(X.columns)}")

R2 Score: 0.4814202815313765
MSE: 0.6795515190149223

Selected Features (Non-zero Coefficients):
MedInc      0.709186
HouseAge    0.106475
Latitude   -0.011479
dtype: float64

Total Selected Features: 3 out of 8
