In [27]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import pandas as pd
import numpy as np  

# Cargar los datos
df = pd.read_csv('bank-additional-full.csv', sep=';')

# Eliminar filas con valores nulos
df_clean = df.dropna().copy()

# Separar características y variable objetivo
X = df_clean.drop('y', axis=1)
y = df_clean['y']

# Columnas numéricas y categóricas
num_features = X.select_dtypes(include=[np.number]).columns
cat_features = X.select_dtypes(include=['object']).columns

# Preprocesamiento: estandarización y codificación
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), num_features),
        ('cat', OneHotEncoder(), cat_features)
    ])

# Aplicar transformaciones al dataset
X_preprocessed = preprocessor.fit_transform(X)
y_encoded = y.map({'no': 0, 'yes': 1})

# Guardar los datos preprocesados para el modelo
pd.DataFrame(X_preprocessed).to_csv('X_preprocessed.csv', index=False)
y_encoded.to_csv('y_encoded.csv', index=False)
