In [1]:
# Importing necessary libraries
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV
import joblib

# Load the preprocessed data
X_train, X_test, y_train, y_test = joblib.load('data/split_data.pkl')

# Define the Lasso Regression model
model = Lasso()

# Define the grid of hyperparameters to search
param_grid = {
    'alpha': [0.01, 0.1, 1, 10, 100],  # Regularization strength
    'fit_intercept': [True, False],  # Whether to calculate the intercept for this model
    'max_iter': [1000, 5000, 10000],  # Maximum number of iterations
    'selection': ['cyclic', 'random']  # If set to ‘random’, a random coefficient is updated every iteration
}

# Use GridSearchCV to search for the best hyperparameters
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='r2')

# Train the model with GridSearchCV
grid_search.fit(X_train, y_train)

# Get the best model from GridSearchCV
best_model = grid_search.best_estimator_

# Evaluate the best model on the test set
y_pred = best_model.predict(X_test)
mse_best = mean_squared_error(y_test, y_pred)
r2_best = r2_score(y_test, y_pred)

print(f"Best Lasso Regressor - Mean Squared Error: {mse_best}")
print(f"Best Lasso Regressor - R² Score: {r2_best}")
print(f"Best Parameters: {grid_search.best_params_}")

# Save the best model
joblib.dump(best_model, 'models/lasso_model.pkl')

Best Lasso Regressor - Mean Squared Error: 2.533030635113719
Best Lasso Regressor - R² Score: 0.878189367723184
Best Parameters: {'alpha': 0.01, 'fit_intercept': True, 'max_iter': 1000, 'selection': 'cyclic'}


['models/lasso_model.pkl']