In [4]:
import metrics
import os
import hypernetx as hnx
import subprocess
import pickle
import hypernetx as hnx

# Define the base path to your dataset
base_path = "../../datasets"

# Define the validation metrics
validation_metrics = [
    metrics.NodeNumDiff(),
    metrics.NodeDegreeDistrWasserstein(),
    metrics.EdgeSizeDistrWasserstein(),
    metrics.Spectral(),
    metrics.Uniqueness(),
    metrics.Novelty(),
    metrics.CentralityCloseness(),
    metrics.CentralityBetweenness(),
    metrics.CentralityHarmonic(),
] 

def generate_hypergraph(dataset_name, output_dir='output'):
    hyperedges = []
    for filename in os.listdir(output_dir):
        if filename.startswith(dataset_name):
            filepath = os.path.join(output_dir, filename)
            with open(filepath, 'r') as file:
                for line in file:
                    nodes = tuple(map(int, line.strip().split()))
                    hyperedges.append(nodes)
    return hnx.Hypergraph(hyperedges)

# Iterate through each dataset folder
for dataset_name in os.listdir(base_path):    
    # Add the ValidEgo metric if "hypergraphEgo" is in the dataset name
    current_metrics = validation_metrics.copy()
    
    if "hypergraphEgo" in dataset_name:
        current_metrics.append(metrics.ValidEgo())

    if "hypergraphSBM" in dataset_name:
        current_metrics.append(metrics.ValidSBM())

    if "hypergraphTree" in dataset_name:
        current_metrics.append(metrics.ValidHypertree())
    
    with open('../../data/' + dataset_name[:-4] + '.pkl', 'rb') as file:
        dataset = pickle.load(file)
    
    # Collect all hypergraphs in the current dataset
    all_hypergraphs = []
    
    for i in range(len(dataset['test'])):
        command = [
            'python', 'hyper_preferential_attachment.py',
            f'--name={dataset_name[:-4]}_{i}',
            f'--file_name={dataset_name[:-4]}_{i}',
            f'--num_nodes={len(dataset["test"][i].nodes)}',
            f'--simplex_per_node_directory=simplex per node',
            f'--size_distribution_directory=size distribution',
            f'--output_directory=output'
        ]
        # Run the command
        subprocess.run(command)
        
        # Generate hypergraph for the current iteration
        hypergraph = generate_hypergraph(f"{dataset_name[:-4]}_{i}")
        all_hypergraphs.append(hypergraph)
    
    # Compute and print metrics for the combined hypergraph        
    print(f"Metrics for dataset {dataset_name}:")
    for metric in current_metrics:
        result = metric(dataset['test'], all_hypergraphs, dataset['train'])
        print(f"{metric}: {result}")
    print("\n" + "="*50 + "\n")

done with hypergraphTree_0
done with hypergraphTree_1
done with hypergraphTree_2
done with hypergraphTree_3
done with hypergraphTree_4
done with hypergraphTree_5
done with hypergraphTree_6
done with hypergraphTree_7
done with hypergraphTree_8
done with hypergraphTree_9
done with hypergraphTree_10
done with hypergraphTree_11
done with hypergraphTree_12
done with hypergraphTree_13
done with hypergraphTree_14
done with hypergraphTree_15
done with hypergraphTree_16
done with hypergraphTree_17
done with hypergraphTree_18
done with hypergraphTree_19
done with hypergraphTree_20
done with hypergraphTree_21
done with hypergraphTree_22
done with hypergraphTree_23
done with hypergraphTree_24
done with hypergraphTree_25
done with hypergraphTree_26
done with hypergraphTree_27
done with hypergraphTree_28
done with hypergraphTree_29
done with hypergraphTree_30
done with hypergraphTree_31
done with hypergraphTree_32
done with hypergraphTree_33
done with hypergraphTree_34
done with hypergraphTree_35
do