In [1]:
%reload_ext autoreload
%autoreload 2

import ast
import mlflow
import missingno as msno
import numpy as np
import pandas as pd
import ppscore as pps
import plotly.express as px
import pendulum
import seaborn as sns
import sys
import xgboost as xgb

from loguru import logger
from matplotlib import pyplot as plt
from pathlib import Path
from pycaret import regression
from scipy.stats import uniform, randint, yeojohnson
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.compose import ColumnTransformer, make_column_selector, TransformedTargetRegressor
from sklearn.metrics import make_scorer, mean_squared_error, mean_absolute_error, r2_score, max_error
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, RobustScaler, OneHotEncoder, quantile_transform
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from typing import Union, Dict
from ydata_profiling import ProfileReport
from yellowbrick.regressor import ResidualsPlot, PredictionError

sys.path.append(str(Path.cwd().parent))
from settings.params import *

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 100)

In [2]:
data = pd.read_csv(CLEANED_DATA)
TARGET_NAME = MODEL_PARAMS['TARGET_NAME']

In [3]:
mlflow.set_tracking_uri(uri="http://localhost:8080")

# Modeling


## Train/Test Split


In [4]:
x_train, x_test, y_train, y_test = train_test_split(data, data[TARGET_NAME], test_size=MODEL_PARAMS["TEST_SIZE"], random_state=SEED)

logger.info(f"\nX train: {x_train.shape}\nY train: {y_train.shape}\n"
            f"X test: {x_test.shape}\nY test: {y_test.shape}")

[32m2024-08-05 21:11:09.023[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m3[0m - [1m
X train: (5205, 8)
Y train: (5205,)
X test: (1302, 8)
Y test: (1302,)[0m


## Training


In [5]:
df = x_train.copy()
df[TARGET_NAME] = np.log(y_train)

In [6]:
exp_reg = regression.setup(df, target=TARGET_NAME, max_encoding_ohe=200, log_experiment=True, experiment_name="building-energy-prediction-training", train_size=0.8)
regression.set_config('seed', SEED)
regression.remove_metric('MAPE')
regression.remove_metric('MSE')
regression.remove_metric('RMSLE')

Unnamed: 0,Description,Value
0,Session id,7707
1,Target,SiteEnergyUse(kBtu)
2,Target type,Regression
3,Original data shape,"(5205, 8)"
4,Transformed data shape,"(5205, 69)"
5,Transformed train set shape,"(4164, 69)"
6,Transformed test set shape,"(1041, 69)"
7,Numeric features,5
8,Categorical features,2
9,Preprocess,True


In [7]:
best_threes_model = regression.compare_models(n_select=3)

Unnamed: 0,Model,MAE,RMSE,R2,TT (Sec)
et,Extra Trees Regressor,0.2443,0.4394,0.8334,0.352
rf,Random Forest Regressor,0.2818,0.4456,0.8294,0.446
xgboost,Extreme Gradient Boosting,0.3138,0.4607,0.8178,0.502
lightgbm,Light Gradient Boosting Machine,0.3437,0.4897,0.7944,0.392
gbr,Gradient Boosting Regressor,0.3663,0.5073,0.7795,0.218
dt,Decision Tree Regressor,0.2992,0.5547,0.7363,0.04
ada,AdaBoost Regressor,0.4938,0.6503,0.638,0.111
knn,K Neighbors Regressor,0.5036,0.6758,0.6092,0.033
ridge,Ridge Regression,0.5095,0.6778,0.6071,0.038
br,Bayesian Ridge,0.5096,0.6779,0.607,0.042




2024/08/05 21:11:46 INFO mlflow.tracking._tracking_service.client: 🏃 View run Extra Trees Regressor at: http://localhost:8080/#/experiments/584040955558151400/runs/7b54e4d8061541bdad03215e15e2139c.


2024/08/05 21:11:46 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/584040955558151400.




2024/08/05 21:11:48 INFO mlflow.tracking._tracking_service.client: 🏃 View run Random Forest Regressor at: http://localhost:8080/#/experiments/584040955558151400/runs/0021aa0e320f43ca8227880383ac49a1.


2024/08/05 21:11:48 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/584040955558151400.




2024/08/05 21:11:49 INFO mlflow.tracking._tracking_service.client: 🏃 View run Extreme Gradient Boosting at: http://localhost:8080/#/experiments/584040955558151400/runs/867e4c29086f48bc8626b4bad4ae8f75.


2024/08/05 21:11:49 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/584040955558151400.




2024/08/05 21:11:49 INFO mlflow.tracking._tracking_service.client: 🏃 View run Light Gradient Boosting Machine at: http://localhost:8080/#/experiments/584040955558151400/runs/50b92d255a8c4f80a0bc91d3ecb2385e.


2024/08/05 21:11:49 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/584040955558151400.




2024/08/05 21:11:49 INFO mlflow.tracking._tracking_service.client: 🏃 View run Gradient Boosting Regressor at: http://localhost:8080/#/experiments/584040955558151400/runs/3dddf7ac3fc947cca273565559181aec.


2024/08/05 21:11:49 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/584040955558151400.




2024/08/05 21:11:50 INFO mlflow.tracking._tracking_service.client: 🏃 View run Decision Tree Regressor at: http://localhost:8080/#/experiments/584040955558151400/runs/f8963488a7154f5388f7f06a0e50ce55.


2024/08/05 21:11:50 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/584040955558151400.




2024/08/05 21:11:50 INFO mlflow.tracking._tracking_service.client: 🏃 View run AdaBoost Regressor at: http://localhost:8080/#/experiments/584040955558151400/runs/e35b59f6e5924b5d9576e6bd40278ce3.


2024/08/05 21:11:50 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/584040955558151400.




2024/08/05 21:11:50 INFO mlflow.tracking._tracking_service.client: 🏃 View run K Neighbors Regressor at: http://localhost:8080/#/experiments/584040955558151400/runs/cd78311a91ba4c6b8e3306695a87723e.


2024/08/05 21:11:50 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/584040955558151400.




2024/08/05 21:11:50 INFO mlflow.tracking._tracking_service.client: 🏃 View run Ridge Regression at: http://localhost:8080/#/experiments/584040955558151400/runs/13e6ff894ddc478191a3ee47a2819bf8.


2024/08/05 21:11:50 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/584040955558151400.




2024/08/05 21:11:51 INFO mlflow.tracking._tracking_service.client: 🏃 View run Bayesian Ridge at: http://localhost:8080/#/experiments/584040955558151400/runs/3ae780e94e454defa2d772d35d0d16fe.


2024/08/05 21:11:51 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/584040955558151400.




2024/08/05 21:11:51 INFO mlflow.tracking._tracking_service.client: 🏃 View run Linear Regression at: http://localhost:8080/#/experiments/584040955558151400/runs/291106ffa69f41df96206b8d263eb397.


2024/08/05 21:11:51 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/584040955558151400.




2024/08/05 21:11:51 INFO mlflow.tracking._tracking_service.client: 🏃 View run Orthogonal Matching Pursuit at: http://localhost:8080/#/experiments/584040955558151400/runs/a00fc2169bb345aaad6e01b696b1e304.


2024/08/05 21:11:51 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/584040955558151400.




2024/08/05 21:11:52 INFO mlflow.tracking._tracking_service.client: 🏃 View run Lasso Regression at: http://localhost:8080/#/experiments/584040955558151400/runs/637bf0381ddd43119237a88082b1948f.


2024/08/05 21:11:52 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/584040955558151400.




2024/08/05 21:11:52 INFO mlflow.tracking._tracking_service.client: 🏃 View run Elastic Net at: http://localhost:8080/#/experiments/584040955558151400/runs/03c6f1c6683a4c468be0b59aa6d37b16.


2024/08/05 21:11:52 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/584040955558151400.




2024/08/05 21:11:52 INFO mlflow.tracking._tracking_service.client: 🏃 View run Lasso Least Angle Regression at: http://localhost:8080/#/experiments/584040955558151400/runs/97e7a91e75314befb2bb0a0022453e64.


2024/08/05 21:11:52 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/584040955558151400.




2024/08/05 21:11:52 INFO mlflow.tracking._tracking_service.client: 🏃 View run Dummy Regressor at: http://localhost:8080/#/experiments/584040955558151400/runs/2b62366885994bf7b7c24d05087b0c34.


2024/08/05 21:11:52 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/584040955558151400.




2024/08/05 21:11:52 INFO mlflow.tracking._tracking_service.client: 🏃 View run Huber Regressor at: http://localhost:8080/#/experiments/584040955558151400/runs/b4bb8799352c4922af4d8fec36885102.


2024/08/05 21:11:52 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/584040955558151400.




2024/08/05 21:11:53 INFO mlflow.tracking._tracking_service.client: 🏃 View run Passive Aggressive Regressor at: http://localhost:8080/#/experiments/584040955558151400/runs/0c1bafaef92842cea047b700de2133e2.


2024/08/05 21:11:53 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/584040955558151400.




2024/08/05 21:11:53 INFO mlflow.tracking._tracking_service.client: 🏃 View run Least Angle Regression at: http://localhost:8080/#/experiments/584040955558151400/runs/0074b2f218b5420cbe85aa766d7b161e.


2024/08/05 21:11:53 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/584040955558151400.


In [8]:
mlflow.end_run()

2024/08/05 21:11:53 INFO mlflow.tracking._tracking_service.client: 🏃 View run Session Initialized a050 at: http://localhost:8080/#/experiments/584040955558151400/runs/4590340b834a4b0f81e3395083080ef1.


2024/08/05 21:11:53 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/584040955558151400.


L'entrainement sur plusieurs types de modèles avec PyCaret montre que pour ce problème, les modèles: ExtraTreesRegressor, XGBRegressor et RandomForestRegressor sont les plus adaptés. Toutefois, nous pensons que les performances obtenus lors de ce premier entrainement peuvent être nettement améliorées. Nous allons attendre de faire le réglage de paramètres et d'obtenir les modèles finaux avant de faire un choix.


## Fine-Tuning


In [9]:
def define_pipeline(numerical_transformer: list,
                    categorical_transformer: list,
                    estimator: Pipeline,
                    target_transformer: bool = False,
                    **kwargs: dict) -> Pipeline:
    """ Define pipeline for modeling

    Args:
        **kwargs:

    Returns:
        Pipeline: sklearn pipeline
    """
    numerical_transformer = make_pipeline(*numerical_transformer)

    categorical_transformer = make_pipeline(*categorical_transformer)

    preprocessor = ColumnTransformer(
        transformers=[
            ("num", numerical_transformer, make_column_selector(dtype_include=["number"])),
            ("cat", categorical_transformer, make_column_selector(dtype_include=["object", "bool"])),
        ],
        remainder="drop",  # non-specified columns are dropped
        verbose_feature_names_out=False,  # will not prefix any feature names with the name of the transformer
    )
    # Append regressor to preprocessing pipelineregreregffdffdffd.
    # Now we have a full prediction pipeline.
    if target_transformer:
        model_pipe1 = Pipeline(steps=[("preprocessor", preprocessor),
                                     ("estimator", estimator)])
        model_pipe = TransformedTargetRegressor(regressor=model_pipe1,
                                                func=np.log,
                                                inverse_func=np.exp)
    else:
        model_pipe = Pipeline(steps=[("preprocessor", preprocessor), ("estimator", estimator)])
        
    # logger.info(f"{model_pipe}")
    return model_pipe

In [10]:
ESTIMATOR_PARAMS = {
    ExtraTreesRegressor.__name__: {
        "estimator": ExtraTreesRegressor(),
        "params": {
            'regressor__estimator__n_estimators': np.arange(10, 200, 5),
        }
    },
    RandomForestRegressor.__name__: {
        "estimator": RandomForestRegressor(),
        "params": {
            'regressor__estimator__n_estimators': np.arange(10, 200, 5),
        }
    },
    xgb.XGBRegressor.__name__: {
        "estimator": xgb.XGBRegressor(),
        "params": {
            'regressor__estimator__n_estimators': np.arange(10, 200, 5),
        }
    }
}

In [11]:
CURRENT_DATE = pendulum.now()

search_cvs = {}

def rmse(actual, predicted):
    return np.sqrt(mean_squared_error(actual, predicted))

def mlflow_log_search(search):
    best_params = search.best_params_

    # Log the best parameters
    mlflow.log_params(best_params)

    # Log the best metric
    mlflow.log_metric("R2", search.best_score_)

    # Log the model
    mlflow.sklearn.log_model(search.best_estimator_, "model")

scoring = {'r2': make_scorer(r2_score),
          'rmse': make_scorer(rmse, greater_is_better=False),
          'mae': make_scorer(mean_absolute_error, greater_is_better=False)}

# Create an experiment if not exists
exp_name = "building-energy-prediction-tuning-sklearn"
experiment = mlflow.get_experiment_by_name(exp_name)
if not experiment:
    experiment_id = mlflow.create_experiment(exp_name)
else:
    experiment_id = experiment.experiment_id

with mlflow.start_run(run_name=f"Session-{CURRENT_DATE.strftime('%Y%m%d_%H%m%S')}", experiment_id=experiment_id) as parent_run:
    for estimator_name, settings in ESTIMATOR_PARAMS.items():
        with mlflow.start_run(run_name=estimator_name, nested=True, experiment_id=experiment_id):  
            estimator = settings["estimator"]
            param_grid = settings["params"]
            pipeline = define_pipeline(numerical_transformer=[SimpleImputer(strategy="median"),RobustScaler()],
                            categorical_transformer=[SimpleImputer(strategy="constant", fill_value="undefined"), OneHotEncoder(drop="if_binary", handle_unknown="ignore")],
                            target_transformer=True,
                            estimator=estimator
                        ) 
            grid_search = GridSearchCV(
                estimator=pipeline,  # Instantiate the estimator
                param_grid=param_grid,
                scoring=scoring,
                refit='r2',
                cv=5,  # Adjust the number of cross-validation folds as needed
                n_jobs=-1  # Use all available cores
            )
            grid_search.fit(x_train, y_train)
            search_cvs[estimator_name] = grid_search

            mlflow.log_param("Estimator", estimator_name)
            mlflow_log_search(grid_search)
mlflow.end_run()



2024/08/05 21:17:41 INFO mlflow.tracking._tracking_service.client: 🏃 View run ExtraTreesRegressor at: http://localhost:8080/#/experiments/558378607717202508/runs/5d478fdfa56f44beb3839ef802ea8474.


2024/08/05 21:17:41 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/558378607717202508.




2024/08/05 21:22:39 INFO mlflow.tracking._tracking_service.client: 🏃 View run RandomForestRegressor at: http://localhost:8080/#/experiments/558378607717202508/runs/e330de05a95d4d4dbcea58bddc5b3d82.


2024/08/05 21:22:39 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/558378607717202508.




2024/08/05 21:23:17 INFO mlflow.tracking._tracking_service.client: 🏃 View run XGBRegressor at: http://localhost:8080/#/experiments/558378607717202508/runs/44879d5f173c45cd810453fba48cb570.


2024/08/05 21:23:17 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/558378607717202508.


2024/08/05 21:23:17 INFO mlflow.tracking._tracking_service.client: 🏃 View run Session-20240805_210853 at: http://localhost:8080/#/experiments/558378607717202508/runs/67d74d81408d4655ac5ec47f7bfb07df.


2024/08/05 21:23:17 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/558378607717202508.


In [12]:
CURRENT_DATE = pendulum.now()

search_cvs = {}

def rmse(actual, predicted):
    return np.sqrt(mean_squared_error(actual, predicted))

def mlflow_log_search(search):
    best_params = search.best_params_

    # Log the best parameters
    mlflow.log_params(best_params)

    # Log the best metric
    mlflow.log_metric("R2", search.best_score_)

    # Log the model
    mlflow.sklearn.log_model(search.best_estimator_, "model")

scoring = {'r2': make_scorer(r2_score),
          'rmse': make_scorer(rmse, greater_is_better=False),
          'mae': make_scorer(mean_absolute_error, greater_is_better=False)}

# Create an experiment if not exists
exp_name = "building-energy-prediction-tuning-sklearn"
experiment = mlflow.get_experiment_by_name(exp_name)
if not experiment:
    experiment_id = mlflow.create_experiment(exp_name)
else:
    experiment_id = experiment.experiment_id

with mlflow.start_run(run_name=f"Session-{CURRENT_DATE.strftime('%Y%m%d_%H%m%S')}", experiment_id=experiment_id) as parent_run:
    for estimator_name, settings in ESTIMATOR_PARAMS.items():
        with mlflow.start_run(run_name=estimator_name, nested=True, experiment_id=experiment_id):  
            estimator = settings["estimator"]
            param_grid = settings["params"]
            pipeline = define_pipeline(numerical_transformer=[SimpleImputer(strategy="median"),RobustScaler()],
                            categorical_transformer=[SimpleImputer(strategy="constant", fill_value="undefined"), OneHotEncoder(drop="if_binary", handle_unknown="ignore")],
                            target_transformer=True,
                            estimator=estimator
                        ) 
            grid_search = GridSearchCV(
                estimator=pipeline,  # Instantiate the estimator
                param_grid=param_grid,
                scoring=scoring,
                refit='r2',
                cv=5,  # Adjust the number of cross-validation folds as needed
                n_jobs=-1  # Use all available cores
            )
            grid_search.fit(x_train, y_train)
            search_cvs[estimator_name] = grid_search

            mlflow.log_param("Estimator", estimator_name)
            mlflow_log_search(grid_search)
mlflow.end_run()



2024/08/05 21:28:49 INFO mlflow.tracking._tracking_service.client: 🏃 View run ExtraTreesRegressor at: http://localhost:8080/#/experiments/558378607717202508/runs/1a3271ea04bc46aeb00c4fd21c83742d.


2024/08/05 21:28:49 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/558378607717202508.


2024/08/05 21:29:13 INFO mlflow.tracking._tracking_service.client: 🏃 View run RandomForestRegressor at: http://localhost:8080/#/experiments/558378607717202508/runs/b57acd5c2431432db0ee8eee29ca0bef.


2024/08/05 21:29:13 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/558378607717202508.


2024/08/05 21:29:13 INFO mlflow.tracking._tracking_service.client: 🏃 View run Session-20240805_210817 at: http://localhost:8080/#/experiments/558378607717202508/runs/3d056654576845cd8255624a65e7e0b0.


2024/08/05 21:29:13 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:8080/#/experiments/558378607717202508.


KeyboardInterrupt: 

## Model Evaluation on Test Data


We are going to evaluate the fine-tuned models to see which one we are going to pick as the final model.


In [None]:
def eval_metrics(y_actual: Union[pd.DataFrame, pd.Series, np.ndarray],
                 y_pred: Union[pd.DataFrame, pd.Series, np.ndarray]
                 ) -> Dict[str, float]:
    """Compute evaluation metrics.

    Args:
        y_actual: Ground truth (correct) target values
        y_pred: Estimated target values.

    Returns:
        Dict[str, float]: dictionary of evaluation metrics.
            Expected keys are: "rmse", "mae", "r2", "max_error"

    """
    metrics = dict()
    # Calculate Root mean squared error, named rmse
    metrics['rmse'] = mean_squared_error(y_actual, y_pred) ** 0.5
    # Calculate mean absolute error, named mae
    metrics['mae'] = mean_absolute_error(y_actual, y_pred)
    # Calculate R-squared: coefficient of determination, named r2
    metrics['r2'] = r2_score(y_actual, y_pred)
    # Calculate max error: maximum value of absolute error (y_actual - y_pred), named maxerror
    metrics['maxerror'] = max_error(y_actual, y_pred)
    # Return a dictionary
    return metrics

In [None]:
# Obtaining best_models after fine-tuning
models = { f"{estimator_name}": search_cv.best_estimator_ for estimator_name, search_cv in search_cvs.items()}

In [None]:
exp_name = "building-energy-prediction-evaluation"
experiment = mlflow.get_experiment_by_name(exp_name)
if not experiment:
    experiment_id = mlflow.create_experiment(exp_name)
else:
    experiment_id = experiment.experiment_id

def evaluate_models(estimators, x_train, x_test, y_train, y_test):
    # Dict of R2 scores for the estimators
    r2_scores = {}
    with mlflow.start_run(run_name=f"Session-{CURRENT_DATE.strftime('%Y%m%d_%H%m%S')}", experiment_id=experiment_id):
        for estimator_name, estimator in estimators.items():
            with mlflow.start_run(run_name=estimator_name, nested=True, experiment_id=experiment_id): 
                y_train_pred = estimator.predict(x_train)
                y_test_pred = estimator.predict(x_test)

                train_metrics = eval_metrics(y_train, y_train_pred)
                test_metrics = eval_metrics(y_test, y_test_pred)

                # Add the R2 score of the model to the global dict
                r2_scores[estimator_name] = test_metrics['r2']

                # Log the regressor parameters
                mlflow.log_params(estimator.regressor.steps[-1][1].get_params())

                # Log the best metric
                mlflow.log_metrics(test_metrics)

                # Log the model
                mlflow.sklearn.log_model(estimator.best_estimator_, "model")

                logger.info(f"""{estimator_name} performance \n{pd.DataFrame({'train': train_metrics, 'test': test_metrics}).T}""")
    return max(r2_scores.items(), key=lambda item: item[1])

mlflow.end_run()

best_estimator, score = evaluate_models(models, x_train, x_test, y_train, y_test)

logger.info(f"""{best_estimator} is the best estimator found for this problem with an R2 score of {score}""")

### Prediction Error Plot


In [None]:
def prediction_error_plot(estimators, x_train, x_test, y_train, y_test):
    for estimator_name, estimator in estimators.items():
        visualizer = PredictionError(estimator, is_fitted="auto", identity=True, bestfit=True)
        visualizer.fit(x_train, y_train)
        visualizer.score(x_test, y_test)
        print(f"Prediction plot for estimator {estimator_name}")
        visualizer.show()

In [None]:
prediction_error_plot(models, x_train, x_test, y_train, y_test)

### Residual Plot


In [None]:
def residual_plot(estimators, x_train, x_test, y_train, y_test):
    for estimator_name, estimator in estimators.items():
        visualizer = ResidualsPlot(estimator, is_fitted="auto")
        visualizer.fit(x_train, y_train)
        visualizer.score(x_test, y_test)
        print(f"Residual plot for estimator {estimator_name}")
        visualizer.show()

In [None]:
residual_plot(models, x_train, x_test, y_train, y_test)