In [1]:
import pandas as pd
import datetime as dp
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from category_encoders import HashingEncoder, TargetEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier   
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer, accuracy_score

In [2]:
dataset_path = 'E:/Datasets/titanic/wrangled dataset'

In [3]:
train_w = pd.read_csv(f'{dataset_path}/train.csv')
test_w = pd.read_csv(f'{dataset_path}/test.csv')

In [4]:
train_w.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked', 'Family', 'Title',
       'Deck', 'TicketPrefix'],
      dtype='object')

In [5]:
X = train_w.drop(['Name', 'Ticket', 'Cabin', 'Survived'], axis=1)
#X_test = test_w.drop(['Name', 'Ticket', 'Cabin'], axis=1)
y = train_w['Survived']

In [6]:
hash_cols   = ['Title', 'Deck']
#target_cols = ["col_tgt_1", "col_tgt_2"]
cat_cols    = ['Embarked']
hashing_tf = HashingEncoder(n_components=4, cols=hash_cols)
#target_tf  = TargetEncoder(cols=target_cols)
ohe_tf     = OneHotEncoder(handle_unknown="ignore")

preprocessor = ColumnTransformer(
    transformers=[
        ("hash",   hashing_tf, hash_cols),
#        ("target", target_tf,  target_cols),
        ("ohe",    ohe_tf,     cat_cols),
        # ("num", StandardScaler(), num_cols),  #With no necessity of scaler due to Random Forest good handling of features
    ],
    remainder="drop"   # o "passthrough"
)

pipe = Pipeline(steps=[
    ("preproc", preprocessor),
    ("model",   RandomForestClassifier(n_estimators=100, random_state=107))
])

In [7]:
param_grid = {
    # Parámetros del HashingEncoder
    "preproc__hash__n_components": [4, 8, 16],
    # Parámetros del TargetEncoder (si quisieras regularización)
    #"preproc__target__smoothing": [0.1, 1.0, 10.0],
    # Parámetros del OneHotEncoder (aunque no muchos para probar)
    # "preproc__ohe__...": [...],

    # Parámetros del modelo RandomForest
    "model__n_estimators": [100, 200],
    "model__max_depth": [None, 5, 10],
    "model__min_samples_split": [2, 5],
    "model__min_samples_leaf": [1, 2]
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=107)

grid_search = GridSearchCV(
    estimator=pipe,
    param_grid=param_grid,
    cv=cv,
    scoring="accuracy",        # métrica, p.ej. 'accuracy', 'roc_auc', etc.
    n_jobs=-1,                # usa todos los cores disponibles
    verbose=2
)

grid_search.fit(X, y)

print("Mejores parámetros:\n", grid_search.best_params_)
print("Mejor AUC en CV: {:.3f}".format(grid_search.best_score_))

best_model = grid_search.best_estimator_
y_pred = best_model.predict(X)

Fitting 5 folds for each of 72 candidates, totalling 360 fits


ValueError: 
All the 360 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
120 fits failed with the following error:
Traceback (most recent call last):
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\pipeline.py", line 654, in fit
    Xt = self._fit(X, y, routed_params, raw_params=params)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\pipeline.py", line 588, in _fit
    X, fitted_transformer = fit_transform_one_cached(
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\joblib\memory.py", line 312, in __call__
    return self.func(*args, **kwargs)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\pipeline.py", line 1551, in _fit_transform_one
    res = transformer.fit_transform(X, y, **params.get("fit_transform", {}))
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\utils\_set_output.py", line 319, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\compose\_column_transformer.py", line 1001, in fit_transform
    result = self._call_func_on_transformers(
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\compose\_column_transformer.py", line 901, in _call_func_on_transformers
    transformer=clone(trans) if not fitted else trans,
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\base.py", line 94, in clone
    return estimator.__sklearn_clone__()
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\base.py", line 300, in __sklearn_clone__
    return _clone_parametrized(self)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\base.py", line 142, in _clone_parametrized
    raise RuntimeError(
RuntimeError: Cannot clone object HashingEncoder(cols=['Title', 'Deck'], max_process=1, n_components=4,
               process_creation_method='spawn'), as the constructor either does not set or modifies parameter process_creation_method

--------------------------------------------------------------------------------
120 fits failed with the following error:
Traceback (most recent call last):
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\pipeline.py", line 654, in fit
    Xt = self._fit(X, y, routed_params, raw_params=params)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\pipeline.py", line 588, in _fit
    X, fitted_transformer = fit_transform_one_cached(
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\joblib\memory.py", line 312, in __call__
    return self.func(*args, **kwargs)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\pipeline.py", line 1551, in _fit_transform_one
    res = transformer.fit_transform(X, y, **params.get("fit_transform", {}))
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\utils\_set_output.py", line 319, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\compose\_column_transformer.py", line 1001, in fit_transform
    result = self._call_func_on_transformers(
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\compose\_column_transformer.py", line 901, in _call_func_on_transformers
    transformer=clone(trans) if not fitted else trans,
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\base.py", line 94, in clone
    return estimator.__sklearn_clone__()
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\base.py", line 300, in __sklearn_clone__
    return _clone_parametrized(self)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\base.py", line 142, in _clone_parametrized
    raise RuntimeError(
RuntimeError: Cannot clone object HashingEncoder(cols=['Title', 'Deck'], max_process=1,
               process_creation_method='spawn'), as the constructor either does not set or modifies parameter process_creation_method

--------------------------------------------------------------------------------
120 fits failed with the following error:
Traceback (most recent call last):
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\pipeline.py", line 654, in fit
    Xt = self._fit(X, y, routed_params, raw_params=params)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\pipeline.py", line 588, in _fit
    X, fitted_transformer = fit_transform_one_cached(
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\joblib\memory.py", line 312, in __call__
    return self.func(*args, **kwargs)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\pipeline.py", line 1551, in _fit_transform_one
    res = transformer.fit_transform(X, y, **params.get("fit_transform", {}))
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\utils\_set_output.py", line 319, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\compose\_column_transformer.py", line 1001, in fit_transform
    result = self._call_func_on_transformers(
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\compose\_column_transformer.py", line 901, in _call_func_on_transformers
    transformer=clone(trans) if not fitted else trans,
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\base.py", line 94, in clone
    return estimator.__sklearn_clone__()
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\base.py", line 300, in __sklearn_clone__
    return _clone_parametrized(self)
  File "E:\Felpipe\Trabajo\Ciencias de datos en general\KaggleChallenges\venv\lib\site-packages\sklearn\base.py", line 142, in _clone_parametrized
    raise RuntimeError(
RuntimeError: Cannot clone object HashingEncoder(cols=['Title', 'Deck'], max_process=1, n_components=16,
               process_creation_method='spawn'), as the constructor either does not set or modifies parameter process_creation_method


In [None]:
pipe.fit(X_train, y_test)
y_pred = pipe.predict(X_train)

In [None]:
acc = accuracy_score(y_test, y_pred)
print(f"Accuracy en test: {acc:.4f}")

In [None]:
scores = cross_val_score(
    estimator=pipe,         # tu pipeline (sin grid search)
    X=X, 
    y=y,
    cv=5,                   # número de pliegues
    scoring='accuracy',     # métrica
    n_jobs=-1
)

print("Accuracy CV por pliegue:", scores)
print("Media de accuracy CV: {:.4f} ± {:.4f}".format(scores.mean(), scores.std()))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    random_state=107,
                                                    stratify=y)