In [1]:
# Question 3: Advanced Model Evaluation with Feature Selection for House Prices

# Step 1: Load a house prices dataset from CSV (Assume you have a house_prices.csv ).
# Step 2: Apply feature selection and create a train-test split.
# Step 3: Train a Lasso Regression model.
# Step 4: Perform model evaluation and hyperparameter tuning using GridSearchCV.
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error

df = sns.load_dataset('diamonds')
df = df[['price', 'carat', 'depth', 'table', 'x', 'y', 'z']]

X = df.drop(columns='price')
y = df['price']

selector = SelectKBest(f_regression, k='all')
X_selected = selector.fit_transform(X, y)

X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('lasso', Lasso())
])

param_grid = {
    'lasso__alpha': [0.01, 0.1, 1, 10]
}

grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

print(f"Best parameters: {grid_search.best_params_}")

best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")


Best parameters: {'lasso__alpha': 1}
Mean Squared Error: 2242719.23
