In [None]:
from pyrdf2vec.graphs import KG
from pyrdf2vec.walkers import RandomWalker
import pandas as pd
import time

In [None]:
# Initialize the knowledge graph (KG) object with the RDF data
kg = KG(
    "../AM/rdf_am-data.ttl",  # Path to the RDF data file
    skip_predicates={},        # Set of predicates to skip during KG creation (empty here)
)


In [None]:
# Read the CSV file containing entities and their associated path numbers into a pandas DataFrame
df = pd.read_csv("entity_path_numbers.csv")
# Filter entities with a path number less than 1,000,000 and convert them to a list
entities = df[df['pathNum'] < 1000000]['entity'].unique().tolist()

In [None]:
# Initialize lists to store the number of paths and the runtime for each iteration
paths_nums = []
run_times = []

# Iterate through the entities list in increments of 10
for i in range(10, len(entities), 10):
    
    # Initialize the RandomWalker with a maximum depth of 4
    walker = RandomWalker(max_depth=4)
    
    # Measure the time taken to extract walks for the current set of entities
    start_time = time.time()
    walks = walker.extract(kg, entities[:i])
    end_time = time.time()
    
    # Calculate the runtime and round it to 3 decimal places
    runtime = round(end_time - start_time, 3)
    run_times.append(runtime)  # Store the runtime
    
    # Calculate the total number of paths generated and store it
    paths_nums.append(sum(len(sublist) for sublist in walks))
    
    # Print the current iteration and its corresponding runtime
    print(i, ':', runtime)
    # Print the list of runtimes accumulated so far
    print(run_times)