In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error

# Generate sample house prices dataset
np.random.seed(0)
data = pd.DataFrame({
    'price': np.random.uniform(100000, 500000, 1000),
    'feature1': np.random.uniform(0, 100, 1000),
    'feature2': np.random.uniform(0, 100, 1000),
    'feature3': np.random.uniform(0, 100, 1000),
    'feature4': np.random.uniform(0, 100, 1000),
    'feature5': np.random.uniform(0, 100, 1000),
    'feature6': np.random.uniform(0, 100, 1000),
    'feature7': np.random.uniform(0, 100, 1000),
    'feature8': np.random.uniform(0, 100, 1000),
    'feature9': np.random.uniform(0, 100, 1000),
    'feature10': np.random.uniform(0, 100, 1000),
    'feature11': np.random.uniform(0, 100, 1000),
    'feature12': np.random.uniform(0, 100, 1000),
})

# Save the dataset to a CSV file
data.to_csv('house_prices.csv', index=False)

# Load the house prices dataset from CSV
data = pd.read_csv('house_prices.csv')

# Define features (X) and target variable (y)
X = data.drop(['price'], axis=1)
y = data['price']

# Apply feature selection using SelectKBest
selector = SelectKBest(score_func=f_regression, k=5)
X_selected = selector.fit_transform(X, y)

# Create a train-test split
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Define hyperparameter tuning space for Lasso Regression
param_grid = {'alpha': [0.01, 0.1, 1, 10]}

# Perform grid search with cross-validation
grid_search = GridSearchCV(Lasso(random_state=42), param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

# Print the best hyperparameters and score
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", -grid_search.best_score_)

# Evaluate the best model on the test set
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Print the predictions and evaluation metrics
print("Predictions:", y_pred)
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))