### Pipelines de operaciones

Agrupar múltiples operaciones en un mismo objeto

* Imputar nulos
* Codificación de categóricos
* Escalada de datos
* Modelado

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [5]:
df = sns.load_dataset('penguins')
df.isnull().sum()

species               0
island                0
bill_length_mm        2
bill_depth_mm         2
flipper_length_mm     2
body_mass_g           2
sex                  11
dtype: int64

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler,OneHotEncoder
from sklearn.impute import SimpleImputer

# Columnas numéricas
pipeline_numeric = Pipeline([
    ('impute_median', SimpleImputer(strategy='median')),
    ('scaler', MinMaxScaler())
])

# Columnas categóricas
pipeline_categorical = Pipeline([
    ('impute_mode', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(drop='first', sparse=False))
])
# Preparar X, y
X = df.drop('body_mass_g', axis=1)
y = df['body_mass_g']


# mapear pipelines a columnas

numeric_col_names = X.select_dtypes(include=np.number).columns.to_list()
categorical_col_names = X.select_dtypes(include='object').columns.to_list()

prepocessor = ColumnTransformer([
    ('numerical', pipeline_numeric, numeric_col_names),
    ('categorical', pipeline_categorical, categorical_col_names)
])

pipeline = Pipeline([
    prepocessor,
    LinearRegression()
])

