In [1]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# 1. Cargar datos
df = pd.read_csv("../01_generacion_datos/clientes_sinteticos.csv")

# 2. Variables
X = df.drop(columns=["cliente_id", "contrató_servicio", "monto_ventas_futuras"])
y = df["contrató_servicio"]

cat_cols = X.select_dtypes(include="object").columns.tolist()
num_cols = X.select_dtypes(exclude="object").columns.tolist()

# 3. Preprocesamiento
preprocesador = ColumnTransformer([
    ("onehot", OneHotEncoder(handle_unknown="ignore"), cat_cols)
], remainder="passthrough")

# 4. Modelo
modelo_rf = Pipeline([
    ("pre", preprocesador),
    ("clf", RandomForestClassifier(n_estimators=100, random_state=42))
])

# 5. Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 6. Entrenar
modelo_rf.fit(X_train, y_train)
y_pred = modelo_rf.predict(X_test)

# 7. Evaluar
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Matriz de Confusión:\n", confusion_matrix(y_test, y_pred))
print("Reporte:\n", classification_report(y_test, y_pred))

# 8. Validación cruzada
scores = cross_val_score(modelo_rf, X, y, cv=5)
print(f"Validación cruzada: {scores.mean():.4f} ± {scores.std():.4f}")

# 9. Guardar modelo
joblib.dump(modelo_rf, "../03_modelado_clasificacion/modelo_clasificado_random_forest.pkl")

Accuracy: 0.995
Matriz de Confusión:
 [[114   0]
 [  1  85]]
Reporte:
               precision    recall  f1-score   support

           0       0.99      1.00      1.00       114
           1       1.00      0.99      0.99        86

    accuracy                           0.99       200
   macro avg       1.00      0.99      0.99       200
weighted avg       1.00      0.99      0.99       200

Validación cruzada: 0.9930 ± 0.0068


['../03_modelado_clasificacion/modelo_clasificado_random_forest.pkl']