In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
df = pd.read_csv('D:/AmesHousing.csv')  

imputer = SimpleImputer(strategy='mean')  
df_imputed = pd.DataFrame(imputer.fit_transform(df.select_dtypes(include=[np.number])))

categorical_cols = df.select_dtypes(include=['object']).columns
preprocessor = ColumnTransformer(
    transformers=[
        ('num', SimpleImputer(strategy='mean'), df.select_dtypes(include=[np.number]).columns),
        ('cat', OneHotEncoder(), categorical_cols)
    ])

X = df.drop(columns='SalePrice')  
y = df['SalePrice']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(X_train_scaled, y_train)
coefficients = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': lasso_model.coef_
})
selected_features = coefficients[coefficients['Coefficient'] != 0]
y_pred = lasso_model.predict(X_test_scaled)
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
print("Selected Features (non-zero coefficients):")
print(selected_features)
print(f"R²: {r2}")
print(f"Mean Squared Error (MSE): {mse}")



Selected Features (non-zero coefficients):
         Feature   Coefficient
0       Lot Area   9065.537873
1   Overall Qual  45236.293527
2   Overall Cond   -432.739517
3    Gr Liv Area  32613.657070
4  TotRms AbvGrd  -8138.733269
5        Yr Sold  -1423.605266
R²: 0.7451562001489547
Mean Squared Error (MSE): 2043222380.2004194
