# Modelado de Datos — Clasificación Binaria (Pipeline + CV)

**Objetivo:** construir un *pipeline* con validación cruzada para clasificación binaria.

**Dataset:** `load_breast_cancer`.


In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import numpy as np

data = load_breast_cancer(as_frame=True)
X = data.data
y = data.target

pipe = Pipeline([('scaler', StandardScaler()), ('clf', LogisticRegression(max_iter=1000))])

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(pipe, X, y, cv=cv, scoring='accuracy')
print('Accuracy CV:', scores)
print('Media:', np.mean(scores), '±', np.std(scores))
