#### Práctica con ejemplos simples y controlados

In [12]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [13]:
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [15]:
#Pipeline básico
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', LinearSVC())
])
pipeline.fit(X_train, y_train)
print(pipeline.score(X_test, y_test))

1.0


#### Práctica con preprocesamiento más completo

In [25]:
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

In [26]:
# Dataset artificial
df = pd.DataFrame({
    'age': [20, 30, np.nan, 25],
    'income': [40000, 50000, 60000, np.nan],
    'target': [1, 0, 1, 0]
})

X = df[['age', 'income']]
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [27]:
# Pipeline completo
pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler()),
    ('model', LogisticRegression())
])

pipeline.fit(X_train, y_train)
print(pipeline.score(X_test, y_test))


1.0


#### ColumnTransformer para datos mixtos

In [26]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

In [52]:
# Dataset artificial
df = pd.DataFrame({
    'age': [20, 30, 23, 25],
    'income': [40000, 50000, 60000, 5000],
    'target': [1, 0, 1, 0]
})

numeric_features = ['age', 'income']
categorical_features = ['target']


In [65]:
X = df[numeric_features]
y = df[categorical_features]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [66]:
# Define procesamiento por tipo de dato
preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numeric_features),
    ('cat', OneHotEncoder(), categorical_features)
])

In [69]:
# Combínalo en un pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', LogisticRegression())
])

