Pipeline con validación cruzada que prueba BIC, K2 y BDeu (con varios equivalent_sample_size) usando pgmpy.

Incluye:

Hill Climbing con múltiples reinicios

K-fold CV (por defecto k=5)

Evaluación en holdout con el mismo score del entrenamiento (consistente) y tabla resumen

Suavizado de parámetros al ajustar CPDs (BayesianEstimator) para evitar ceros

Opcional: listas blanca/negra de arcos (si quieres imponer causalidad obvia)

### Setup inicial

In [None]:
# Librerías básicas
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx

# Librerías BN
from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.estimators import HillClimbSearch, BayesianEstimator
from pgmpy.estimators import HillClimbSearch, BIC, K2, BDeu
from pgmpy.estimators import BayesianEstimator, MaximumLikelihoodEstimator
from pgmpy.readwrite import BIFWriter

# Configuración general
pd.set_option("display.max_columns", None)
plt.rcParams["figure.figsize"] = (8, 6)

print("✅ Librerías cargadas correctamente")


### Cargar y preparar datos

In [None]:
# Cargar dataset
data = pd.read_csv("../eleven_nodes/tv_bn_dataset.csv")

# Convertir todas las variables a categóricas (importante para BN)
for c in data.columns:
    data[c] = data[c].astype("category")

print("✅ Datos cargados y convertidos a categóricos")
display(data.head())


### Crar ADG inicial

In [None]:
# Relaciones base con sentido causal
base_edges = [
    ("EdadUsuario", "GeneroPrograma"),
    ("DiaSemana", "Hora"),
    ("Hora", "GeneroPrograma"),
    ("GeneroPrograma", "Satisfaccion"),
    ("Satisfaccion", "Recomendado")
]

start_model = DiscreteBayesianNetwork(base_edges)

for col in data.columns:
    if col not in start_model.nodes():
        start_model.add_node(col)

print("✅ Relaciones base definidas")
for edge in base_edges:
    print(" -", edge)


In [None]:
def learn_and_plot(data, method_name, scoring_method, start_dag=None, white_list=None, ess=None):
    hc = HillClimbSearch(data)
    model_struct = hc.estimate(scoring_method=scoring_method, start_dag=start_dag)

    # Convertir explicitamente a lista
    edges = list(model_struct.edges())

    model = DiscreteBayesianNetwork(edges)
    model.fit(data, estimator=BayesianEstimator)

    # Visualización segura
    G = nx.DiGraph()
    G.add_nodes_from(model.nodes())
    G.add_edges_from(edges)

    plt.figure(figsize=(10, 8))
    pos = nx.spring_layout(G, seed=42)
    nx.draw(G, pos, with_labels=True, node_size=1000, node_color="lightblue",
            font_size=10, font_weight="bold", arrowsize=20)
    title = f"{method_name}"
    if ess:
        title += f" (ESS={ess})"
    plt.title(title)
    plt.show()

    print(f"✅ {method_name}: {len(edges)} arcos aprendidos")
    return model_struct


### Baseline

In [None]:
# 1. BIC
bic_model = learn_and_plot(data, "BIC", BIC(data))

# 2. K2
k2_model = learn_and_plot(data, "K2", K2(data))

# 3. BDeu con distintos ESS
ess_list = [1, 5, 10, 50, 100, 500]
for ess in ess_list:
    print(f"--- equivalent_sample_size = {ess} ---")
    bdeu_model = learn_and_plot(data, f"BDeu", BDeu(data, equivalent_sample_size=ess), ess=ess)


### Start_DAG

In [None]:
# 1. BIC
bic_start = learn_and_plot(data, "BIC (start_dag)", BIC(data), start_dag=start_model)

# 2. K2
k2_start = learn_and_plot(data, "K2 (start_dag)", K2(data), start_dag=start_model)

# 3. BDeu
for ess in ess_list:
    print(f"--- equivalent_sample_size = {ess} ---")
    bdeu_start = learn_and_plot(data, f"BDeu (start_dag)", BDeu(data, equivalent_sample_size=ess), start_dag=start_model, ess=ess)
