# Example Notebook for Graph Building

This notebook demonstrates how to use the `process_graph` function from `build_graph.py` to process JSON files and build/prune a graph.

In [None]:
%cd ..

In [None]:
import os
from graphrag_tagger.build_graph import process_graph

# Define sample input and output folders
input_folder = (
    "notebook/example/results"  # update this path to your folder containing JSON files
)
output_folder = "notebook/example/results/graph_outputs"  # update this path to where you want the results saved

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Process graph with a specified threshold percentile (e.g., 97.5)
graph = process_graph(
    input_folder,
    output_folder,
    threshold_percentile=97.5,
    content_type_filter="paragraph",
)

# The processed graph is stored in 'graph' and the connected components map is saved to the output folder.
print("Graph processing completed.")

In [None]:
import json
import os

raw: dict = json.load(open(os.path.join(output_folder, "connected_components.json")))

len(raw)

In [None]:
raw["0"]

In [None]:
len(set(raw.values())) # unique tag

In [None]:
connected_chunks = {}
for k, v in raw.items():
    if v in connected_chunks:
        connected_chunks[v].append(int(k) + 1)
    else:
        connected_chunks[v] = [int(k) + 1]
        
len(connected_chunks)

In [None]:
examples = []

for k, v in connected_chunks.items():
    if len(v) > 1:
        print(k, v)
        if len(v) > len(examples):
            examples = v

In [None]:
example1 = json.load(open(os.path.join(input_folder, f"chunk_{examples[0]}.json")))
example2 = json.load(open(os.path.join(input_folder, f"chunk_{examples[1]}.json")))
example3 = json.load(open(os.path.join(input_folder, f"chunk_{examples[2]}.json")))

In [None]:
print(example1["chunk"])

In [None]:
print(example2["chunk"])

In [None]:
print(example3["chunk"])