In [3]:
import random
import os
from pathlib import Path

num_graphs = 64
num_vertices_per_graph = 5 #(int) (256 / num_graphs)
num_edges_per_graph = num_vertices_per_graph - 1
graph_depth = 2
num_child_vertices_per_graph = (int) (64 / num_graphs) # n = 64 / num_graphs = 2^6 / num_graphs
num_grandchild_vertices_per_graph = 1 # m = 2^(a - log2(n)) - 1

#vertex_type_count = 2 
#edge_type_count = 1
#graph_type_count = 1

#csv_graph_folder = "csv_" + f"{num_graphs}" + "_graphs" 
csv_graph_folder = str(num_graphs)
graphs_csv_file = 'graphs.csv'
vertices_csv_file = 'vertices.csv'
edges_csv_file = 'edges.csv'
metadata_csv_file = 'metadata.csv'

# Create graph folder
path = Path(csv_graph_folder)
path.mkdir(parents=True, exist_ok=True)

# vertex_types = [f"v{chr(ord('A') + i)}" for i in range(vertex_type_count)]  # Generate vertex types
# edge_types = [f"e{i}" for i in range(edge_type_count)]     # Generate edge types
#graph_types = [f"g{i}" for i in range(graph_type_count)]   # Generate graph types

vertex_types = ["parent", "child", "grandchild"]
edge_types = ["childOf"]
graph_types = ["graph"]  
timestamp_properties = ["transaction time", "valid time"]

def generate_entity_properties_config(entity_types, num_properties, use_random_properties):
    properties_config = {}
    for entity_type in entity_types:
        if use_random_properties:
            properties_config[entity_type] = random.randint(1, 4)
        else:
            properties_config[entity_type] = num_properties
    return properties_config

use_random_properties = False # Set to True if you want a random amount of properties per vertex type, False for fixed properties
num_properties_graphs = 0
num_properties_vertices = 0
num_properties_edges = 0

graphs_properties_config = generate_entity_properties_config(graph_types, num_properties_graphs, use_random_properties)
vertices_properties_config = generate_entity_properties_config(vertex_types, num_properties_vertices, use_random_properties)
edges_properties_config = generate_entity_properties_config(edge_types, num_properties_edges, use_random_properties)


graphs_csv = ""
vertices_csv = ""
edges_csv = ""
metadata_csv = ""

# Initialize the starting ID for parent vertices and edges
#parent_vertex_start_id = num_vertices_per_graph
#edge_start_id = num_edges_per_graph - 1

entity_index = num_graphs - 1

def get_id():
    global entity_index
    entity_index += 1

    return f"{entity_index:024x}"

timestamp_counter = 0

def generate_timestamps(transactionAndValidIdentical):
    global timestamp_counter
    timestamps = []
    if transactionAndValidIdentical:
        # first_random = random.randint(0, 999999999) # Technically possible max and min values -> (-9223372036854775808,9223372036854775807)
        # second_random = random.randint(first_random, 999999999) # Greater than or equal to the first one
        # timestamps.append((first_random, second_random))
        # timestamps.append((first_random, second_random))
        timestamps = [(timestamp_counter, timestamp_counter),(timestamp_counter, timestamp_counter)]
        timestamp_counter += 1
    else:
        for _ in range(2):
            first_random = random.randint(0, 999999999)
            second_random = random.randint(first_random, 999999999)
            timestamps.append((first_random, second_random))

    return timestamps

def generate_child_vertices(graph_id, parent_vertex_id, depth, label, num_vertices):
    depth -= 1
    global vertices_csv
    global edges_csv
    #global num_vertices_per_graph
    global vertices_properties_config
    global num_grandchild_vertices_per_graph
    global graph_depth

    grnd_vert = num_grandchild_vertices_per_graph

    # Connect child vertices to the parent with edges
    for i in range(0, num_vertices):
        child_vertex_id = get_id() # Unique ID for each child vertex
        child_vertex_graphs = f"[{graph_id}]"
        child_vertex_label = label #"child"
        child_vertex_properties = "|".join([f"{random.randint(1, 100)}" for _ in range(vertices_properties_config[child_vertex_label])])
        child_vertex_timestamps = generate_timestamps(transactionAndValidIdentical)
        child_vertex_timestamp_str = ",".join([f"({tx_from},{tx_to})" for (tx_from, tx_to) in child_vertex_timestamps])
        vertices_csv += f"{child_vertex_id};{child_vertex_graphs};{child_vertex_label};{child_vertex_properties};{child_vertex_timestamp_str}\n"

        # Create edge between child and parent
        edge_id = get_id() #f"{(i * num_vertices_per_graph + num_graphs) + j + num_edges_per_graph + num_vertices_per_graph:024x}"  # Unique ID for each edge
        edge_graphs = f"[{graph_id}]"
        source_vertex = child_vertex_id
        target_vertex = parent_vertex_id
        edge_label = "childOf"
        edge_properties = "|".join([f"{random.randint(1, 10)}" for _ in range(edges_properties_config[edge_label])])
        #edge_timestamps = generate_timestamps(transactionAndValidIdentical)
        #edge_timestamp_str = ",".join([f"({tx_from},{tx_to})" for (tx_from, tx_to) in edge_timestamps])
        edges_csv += f"{edge_id};{edge_graphs};{source_vertex};{target_vertex};{edge_label};{edge_properties};{child_vertex_timestamp_str}\n" # edge_timestamp_str --> currently it copies the timestamp of its source vertex

        # if depth is graph_depth -1:
        #     print("current index: " + str(i))

        if i is num_vertices - 1:
            grnd_vert -= 1

        if depth > 0:
            generate_child_vertices(graph_id, child_vertex_id, depth, "grandchild", grnd_vert)


transactionAndValidIdentical = True
def main():
    global graphs_csv
    global vertices_csv

    # Generate graph data
    for i in range(num_graphs):
        graph_id = f"{i:024x}"
        graph_label = random.choice(graph_types)
        graph_properties = "|".join([f"{random.randint(1, 5)}" for _ in range(graphs_properties_config[graph_label])])
        #graph_timestamps = generate_timestamps(transactionAndValidIdentical) 
        graph_timestamps = [(timestamp_counter, timestamp_counter),(timestamp_counter, timestamp_counter)]
        graph_timestamps_str = ",".join([f"({tx_from},{tx_to})" for (tx_from, tx_to) in graph_timestamps])
        
        graphs_csv += f"{graph_id};{graph_label};{graph_properties};{graph_timestamps_str}\n"

        # Generate parent vertex for each graph
        parent_vertex_id = get_id() #f"{i * num_vertices_per_graph + num_graphs:024x}"  # Unique ID for each parent vertex
        parent_vertex_graphs = f"[{graph_id}]"
        parent_vertex_label = "parent"
        parent_vertex_properties = "|".join([f"{random.randint(1, 100)}" for _ in range(vertices_properties_config[parent_vertex_label])])
        parent_vertex_timestamps = generate_timestamps(transactionAndValidIdentical)
        parent_vertex_timestamp_str = ",".join([f"({tx_from},{tx_to})" for (tx_from, tx_to) in parent_vertex_timestamps])
        vertices_csv += f"{parent_vertex_id};{parent_vertex_graphs};{parent_vertex_label};{parent_vertex_properties};{parent_vertex_timestamp_str}\n"

        generate_child_vertices(graph_id, parent_vertex_id, graph_depth, "child", num_child_vertices_per_graph)
        # # Connect child vertices to the parent with edges
        # for j in range(1, num_vertices_per_graph):
        #     child_vertex_id = f"{(i * num_vertices_per_graph + num_graphs) + j:024x}"  # Unique ID for each child vertex
        #     child_vertex_graphs = f"[{graph_id}]"
        #     child_vertex_label = "child"
        #     child_vertex_properties = "|".join([f"{random.randint(1, 100)}" for _ in range(vertices_properties_config[child_vertex_label])])
        #     child_vertex_timestamps = generate_timestamps(transactionAndValidIdentical)
        #     child_vertex_timestamp_str = ",".join([f"({tx_from},{tx_to})" for (tx_from, tx_to) in child_vertex_timestamps])
        #     vertices_csv += f"{child_vertex_id};{child_vertex_graphs};{child_vertex_label};{child_vertex_properties};{child_vertex_timestamp_str}\n"

        #     # Create edge between child and parent
        #     edge_id = f"{(i * num_vertices_per_graph + num_graphs) + j + num_edges_per_graph + num_vertices_per_graph:024x}"  # Unique ID for each edge
        #     edge_graphs = f"[{graph_id}]"
        #     source_vertex = child_vertex_id
        #     target_vertex = parent_vertex_id
        #     edge_label = "childOf"
        #     edge_properties = "|".join([f"{random.randint(1, 10)}" for _ in range(edges_properties_config[edge_label])])
        #     edge_timestamps = generate_timestamps(transactionAndValidIdentical)
        #     edge_timestamp_str = ",".join([f"({tx_from},{tx_to})" for (tx_from, tx_to) in edge_timestamps])
        #     edges_csv += f"{edge_id};{edge_graphs};{source_vertex};{target_vertex};{edge_label};{edge_properties};{child_vertex_timestamp_str}\n" # edge_timestamp_str --> currently it copies the timestamp of its source vertex

    def generate_metadata(entity_types, entity_labels, entity_properties_config):
        metadata_csv = ""
        
        for entity_type, label in zip(entity_types, entity_labels):
            metadata_csv += f"{entity_type};{label};"
            entity_properties = ",".join([f"{label}_prop_{j}:int" for j in range(entity_properties_config[label])])
            metadata_csv += entity_properties + "\n"
        
        return metadata_csv

    metadata_csv = generate_metadata(["g"] * num_graphs, graph_types, graphs_properties_config)
    metadata_csv += generate_metadata(["v"] * len(vertex_types), vertex_types, vertices_properties_config)
    metadata_csv += generate_metadata(["e"] * len(edge_types), edge_types, edges_properties_config)

    # Save the generated data to separate files
    with open(os.path.join(csv_graph_folder, graphs_csv_file), "w") as f:
        f.write(graphs_csv)

    with open(os.path.join(csv_graph_folder, vertices_csv_file), "w") as f:
        f.write(vertices_csv)

    with open(os.path.join(csv_graph_folder, edges_csv_file), "w") as f:
        f.write(edges_csv)

    with open(os.path.join(csv_graph_folder, metadata_csv_file), "w") as f:
        f.write(metadata_csv)

main()