# Modeling

## Phase 3 â€” Model Training and Evaluation

**Objective:**
Train and evaluate machine learning models using a clean and
leakage-free preprocessing pipeline.

The modeling process follows a progressive strategy:
1. Establish a simple baseline
2. Train more expressive models
3. Compare performance using a consistent metric


In [16]:
import sys
import os

# Add project root to Python path
project_root = os.path.abspath("..")
if project_root not in sys.path:
    sys.path.append(project_root)

In [18]:
import pandas as pd
import numpy as np

from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Ridge
from sklearn.pipeline import Pipeline


In [19]:
from src.preprocessing import build_preprocessor
from src.temporal import add_temporal_features

train_df = pd.read_csv("../data/raw/train.csv")
train_df = add_temporal_features(train_df)

X = train_df.drop(columns=["SalePrice"])
y = train_df["SalePrice"]

preprocessor = build_preprocessor()

In [20]:
model = Ridge(alpha=1.0)

pipeline = Pipeline(steps=[
    ("preprocessing", preprocessor),
    ("model", model)
])

In [21]:
rmse_scores = -cross_val_score(
    pipeline,
    X,
    y,
    scoring="neg_root_mean_squared_error",
    cv=5
)

rmse_scores.mean(), rmse_scores.std()

ValueError: 
All the 5 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
4 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\model_selection\_validation.py", line 833, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\base.py", line 1336, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\pipeline.py", line 613, in fit
    Xt = self._fit(X, y, routed_params, raw_params=params)
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\pipeline.py", line 547, in _fit
    X, fitted_transformer = fit_transform_one_cached(
                            ~~~~~~~~~~~~~~~~~~~~~~~~^
        cloned_transformer,
        ^^^^^^^^^^^^^^^^^^^
    ...<5 lines>...
        params=step_params,
        ^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\joblib\memory.py", line 326, in __call__
    return self.func(*args, **kwargs)
           ~~~~~~~~~^^^^^^^^^^^^^^^^^
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\pipeline.py", line 1484, in _fit_transform_one
    res = transformer.fit_transform(X, y, **params.get("fit_transform", {}))
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\utils\_set_output.py", line 316, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\base.py", line 1336, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\compose\_column_transformer.py", line 999, in fit_transform
    result = self._call_func_on_transformers(
        X,
    ...<3 lines>...
        routed_params=routed_params,
    )
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\compose\_column_transformer.py", line 901, in _call_func_on_transformers
    return Parallel(n_jobs=self.n_jobs)(jobs)
           ~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\utils\parallel.py", line 91, in __call__
    return super().__call__(iterable_with_config_and_warning_filters)
           ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\joblib\parallel.py", line 1986, in __call__
    return output if self.return_generator else list(output)
                                                ~~~~^^^^^^^^
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\joblib\parallel.py", line 1914, in _get_sequential_output
    res = func(*args, **kwargs)
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\utils\parallel.py", line 184, in __call__
    return self.function(*args, **kwargs)
           ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\pipeline.py", line 1484, in _fit_transform_one
    res = transformer.fit_transform(X, y, **params.get("fit_transform", {}))
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\base.py", line 1336, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\pipeline.py", line 689, in fit_transform
    return last_step.fit_transform(
           ~~~~~~~~~~~~~~~~~~~~~~~^
        Xt, y, **last_step_params["fit_transform"]
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\utils\_set_output.py", line 316, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\base.py", line 910, in fit_transform
    return self.fit(X, y, **fit_params).transform(X)
           ~~~~~~~~^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\base.py", line 1336, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\preprocessing\_encoders.py", line 1516, in fit
    fit_results = self._fit(
        X,
    ...<2 lines>...
        return_and_ignore_missing_for_infrequent=True,
    )
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\preprocessing\_encoders.py", line 167, in _fit
    raise ValueError(msg)
ValueError: Found unknown categories [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] in column 0 during fit

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\model_selection\_validation.py", line 833, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\base.py", line 1336, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\pipeline.py", line 613, in fit
    Xt = self._fit(X, y, routed_params, raw_params=params)
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\pipeline.py", line 547, in _fit
    X, fitted_transformer = fit_transform_one_cached(
                            ~~~~~~~~~~~~~~~~~~~~~~~~^
        cloned_transformer,
        ^^^^^^^^^^^^^^^^^^^
    ...<5 lines>...
        params=step_params,
        ^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\joblib\memory.py", line 326, in __call__
    return self.func(*args, **kwargs)
           ~~~~~~~~~^^^^^^^^^^^^^^^^^
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\pipeline.py", line 1484, in _fit_transform_one
    res = transformer.fit_transform(X, y, **params.get("fit_transform", {}))
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\utils\_set_output.py", line 316, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\base.py", line 1336, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\compose\_column_transformer.py", line 999, in fit_transform
    result = self._call_func_on_transformers(
        X,
    ...<3 lines>...
        routed_params=routed_params,
    )
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\compose\_column_transformer.py", line 901, in _call_func_on_transformers
    return Parallel(n_jobs=self.n_jobs)(jobs)
           ~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\utils\parallel.py", line 91, in __call__
    return super().__call__(iterable_with_config_and_warning_filters)
           ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\joblib\parallel.py", line 1986, in __call__
    return output if self.return_generator else list(output)
                                                ~~~~^^^^^^^^
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\joblib\parallel.py", line 1914, in _get_sequential_output
    res = func(*args, **kwargs)
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\utils\parallel.py", line 184, in __call__
    return self.function(*args, **kwargs)
           ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\pipeline.py", line 1484, in _fit_transform_one
    res = transformer.fit_transform(X, y, **params.get("fit_transform", {}))
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\base.py", line 1336, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\pipeline.py", line 689, in fit_transform
    return last_step.fit_transform(
           ~~~~~~~~~~~~~~~~~~~~~~~^
        Xt, y, **last_step_params["fit_transform"]
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\utils\_set_output.py", line 316, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\base.py", line 910, in fit_transform
    return self.fit(X, y, **fit_params).transform(X)
           ~~~~~~~~^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\base.py", line 1336, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\preprocessing\_encoders.py", line 1516, in fit
    fit_results = self._fit(
        X,
    ...<2 lines>...
        return_and_ignore_missing_for_infrequent=True,
    )
  File "C:\Users\joaop\OneDrive\Documents\house-price-ml\.venv\Lib\site-packages\sklearn\preprocessing\_encoders.py", line 167, in _fit
    raise ValueError(msg)
ValueError: Found unknown categories [2, 3, 4, 5, 6, 7, 8, 9, 10] in column 0 during fit
