In [None]:
import os
import random
import networkx as nx
from pgmpy.readwrite import BIFWriter
from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD

In [None]:
def generate_connected_dag(n, p, max_parents=3):
    """
    Generate a connected Directed Acyclic Graph (DAG).
    
    Parameters:
        n (int): Number of nodes in the graph.
        p (float): Probability of adding additional edges.
        max_parents (int): Maximum number of parents allowed for each node (default is 3).
        
    Additional constraint:
        The number of nodes with exactly `max_parents` parents should not exceed 5% of the total number of nodes.
    
    Returns:
        networkx.DiGraph: A connected DAG satisfying the specified constraints.
    """
    # Determine the maximum allowable number of nodes with max_parents parents
    max_4_parents = int(0.05 * n)
    current_4_parents = 0

    # Initialize a directed graph with n nodes
    tree = nx.DiGraph()
    tree.add_nodes_from([f"X{i}" for i in range(n)])

    # Construct a spanning tree to ensure weak connectivity
    for i in range(1, n):
        possible_parents = [
            f"X{j}" for j in range(i)
            if tree.in_degree(f"X{j}") < max_parents and
               (tree.in_degree(f"X{j}") != max_parents - 1 or current_4_parents < max_4_parents)
        ]

        if possible_parents:
            parent = random.choice(possible_parents)
            tree.add_edge(parent, f"X{i}")
            if tree.in_degree(parent) == max_parents:
                current_4_parents += 1

    # Add additional edges based on probability p, while maintaining DAG structure and constraints
    for i in range(n):
        for j in range(n):
            if i == j:
                continue

            if random.random() < p:
                target_degree = tree.in_degree(f"X{j}")

                # Check all constraints before adding an edge
                if (target_degree < max_parents and
                    (target_degree != max_parents - 1 or current_4_parents < max_4_parents)):

                    new_tree = tree.copy()
                    new_tree.add_edge(f"X{i}", f"X{j}")

                    # Ensure acyclicity is preserved
                    if nx.is_directed_acyclic_graph(new_tree):
                        tree.add_edge(f"X{i}", f"X{j}")
                        if tree.in_degree(f"X{j}") == max_parents:
                            current_4_parents += 1

    # Final validation checks
    assert nx.is_directed_acyclic_graph(tree), "The generated graph is not a DAG."
    assert nx.is_weakly_connected(tree), "The graph is not weakly connected."
    assert all(d <= max_parents for _, d in tree.in_degree()), \
        f"Some nodes have more than {max_parents} parents."

    # Verify the number of nodes with exactly `max_parents` parents does not exceed the threshold
    nodes_with_4_parents = sum(1 for _, d in tree.in_degree() if d == max_parents)
    assert nodes_with_4_parents <= max_4_parents, \
        f"The number of nodes with {max_parents} parents exceeds 5% of total nodes."

    return tree


def generate_state_names(n):
    return {f"X{i}": ["A", "B"] for i in range(n)}

def get_random_cpds_with_labels(model, state_names, inplace=False, seed=None):
    cpds = []
    for node in model.nodes():
        parents = list(model.predecessors(node))
        cpds.append(
            TabularCPD.get_random(
                variable=node,
                evidence=parents,
                cardinality={var: 2 for var in model.nodes()},
                state_names=state_names, 
                seed=seed
            )
        )
    if inplace:
        model.add_cpds(*cpds)
    else:
        return cpds

In [None]:
n_values = [20, 40, 60, 80, 100]#
p_values = [0.1, 0.05, 0.01, 0.005]
num_networks = 10


os.makedirs("network", exist_ok=True)

for n in n_values:
 
    n_dir = os.path.join("network", f"n_{n}")
    os.makedirs(n_dir, exist_ok=True)
    
    for p in p_values:

        p_dir = os.path.join(n_dir, f"p_{p}")
        os.makedirs(p_dir, exist_ok=True)
        
        for i in range(1, 11):
            nx_G = generate_connected_dag(n, p)

            BN = BayesianNetwork()
            BN.add_nodes_from(nx_G.nodes())
            BN.add_edges_from(nx_G.edges())
            

            state_names = generate_state_names(n)
            get_random_cpds_with_labels(BN, state_names, inplace=True)

            filename = os.path.join(p_dir, f"n_{n}_p_{p}_{i}.bif")
            writer = BIFWriter(BN)
            writer.write_bif(filename=filename)
            
            print(f"Generated and saved: {filename}")

print("All networks have been successfully generated!")