In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
df = pd.read_csv('house_data.csv')  

imputer = SimpleImputer(strategy='mean')  
df_imputed = pd.DataFrame(imputer.fit_transform(df.select_dtypes(include=[np.number])))

categorical_cols = df.select_dtypes(include=['object']).columns
preprocessor = ColumnTransformer(
    transformers=[
        ('num', SimpleImputer(strategy='mean'), df.select_dtypes(include=[np.number]).columns),
        ('cat', OneHotEncoder(), categorical_cols)
    ])

X = df.drop(columns='Price')  
y = df['Price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(X_train_scaled, y_train)
coefficients = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': lasso_model.coef_
})
selected_features = coefficients[coefficients['Coefficient'] != 0]
y_pred = lasso_model.predict(X_test_scaled)
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
print("Selected Features (non-zero coefficients):")
print(selected_features)
print(f"R²: {r2}")
print(f"Mean Squared Error (MSE): {mse}")



Selected Features (non-zero coefficients):
                                  Feature    Coefficient
0                                      id -146881.982119
1                                    Date     802.883974
2                      number of bedrooms  -35867.997779
3                     number of bathrooms   20111.153554
4                             living area  197152.184264
5                                lot area   -9176.016375
6                        number of floors  -14042.726633
7                      waterfront present   47163.412979
8                         number of views   25045.315336
9                  condition of the house    7513.509564
10                     grade of the house   60759.322270
11  Area of the house(excluding basement)  -34388.850203
12                   Area of the basement  -42131.204906
13                             Built Year  -46962.509092
14                        Renovation Year    5946.783442
15                            Postal Code   2

  model = cd_fast.enet_coordinate_descent(
