In [1]:
# Question 3: Advanced Model Evaluation with Feature Selection for House Prices

# Step 1: Load a house prices dataset from CSV (Assume you have a house_prices.csv ).
# Step 2: Apply feature selection and create a train-test split.
# Step 3: Train a Lasso Regression model.
# Step 4: Perform model evaluation and hyperparameter tuning using GridSearchCV.
import os
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectFromModel

if 'house_prices.csv' in os.listdir():
    df = pd.read_csv('house_prices.csv')
    
    df = df.select_dtypes(include=['number']).dropna(axis=1)
    df.dropna(inplace=True)
    
    X = df.drop('SalePrice', axis=1)
    y = df['SalePrice']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler()),
        ('feature_select', SelectFromModel(Lasso(alpha=0.1))),
        ('model', Lasso())
    ])

    param_grid = {
        'model__alpha': [0.001, 0.01, 0.1, 1, 10]
    }

    grid = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_mean_squared_error')
    grid.fit(X_train, y_train)

    y_pred = grid.predict(X_test)
    rmse = mean_squared_error(y_test, y_pred, squared=False)

    print(f"Best alpha: {grid.best_params_['model__alpha']}")
    print(f"Test RMSE: {rmse:.2f}")
else:
    print("File 'house_prices.csv' not found in current directory.")


File 'house_prices.csv' not found in current directory.
