In [None]:
import sys
from pathlib import Path
import logging
import networkx as nx
import matplotlib.pyplot as plt
import json

# --- Setup Logging and Paths ---
logging.basicConfig(level=logging.INFO, filename='notebook.log', filemode='w', format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

project_root = Path('.').resolve()
src_path = project_root / 'layered-context-graph' / 'src'
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))
logger.info(f"Project root set to: {project_root}")

from partitioning.partition_manager import PartitionManager
from models.qwq_model import QwQModel
from models.baai_model import BAAIModel

In [None]:
# --- Cell 2: Load Data and Define Rules ---
source_text_path = Path('./Layer_Context_Window_Graphs')
if not source_text_path.exists():
    raise FileNotFoundError(f"Source text not found. Please create a file at {source_text_path}")
with open(source_text_path, 'r') as f:
    source_text = f.read()

# Define the K-Rules for segmentation
segmentation_rules = [
    "Split the document into its major sections like Introduction, Main Body, and Conclusion.",
    "Break down each section into paragraphs.",
    "Isolate code blocks and mathematical formulas.",
    "Divide long paragraphs into individual sentences."
]

# Define the prompt for the final reassembly
synthesis_prompt = "Create a concise, easy-to-read summary of the key findings from the provided text."

print("Data and rules loaded.")

In [None]:
# --- Cell 3: Run the Full Pipeline ---
print("--- Phase 1: Initializing Models and Manager ---")
segmenter_model = QwQModel(qwq_model_path='./QwQ_LCoT_7B_Instruct/')
embedding_model = BAAIModel(model_path='./bge-en-icl')
manager = PartitionManager()

print("\n--- Phase 2: Hierarchical Partitioning ---")
manager.initialize_graph(source_text)

for i, rule in enumerate(segmentation_rules):
    print(f"Applying K-Rule {i+1}: '{rule}'")
    leaves_to_process = manager.get_leaves_to_process()
    
    if not leaves_to_process:
        print("No more leaves to process. Stopping partitioning.")
        break
        
    for segment_to_split in leaves_to_process:
        if segment_to_split.has_code or segment_to_split.has_math:
            # If segment has code/math, don't split further, just add back to queue
            manager.add_child_segments(segment_to_split.id, [segment_to_split.content])
            continue

        new_content_pieces = segmenter_model.segment(rule, segment_to_split.content)
        manager.add_child_segments(segment_to_split.id, new_content_pieces)

print(f"Partitioning complete. {len(manager.segments)} segments created.")

print("\n--- Phase 3: Adding Semantic Edges ---")
manager.add_semantic_edges(embedding_model)
print("Semantic edges added.")

print("\n--- Phase 4: Classifying Nodes ---")
manager.classify()
print("Classification complete.")

print("\n--- Phase 5: Reassembling Text ---")
summary = manager.reassemble(synthesis_prompt, key='summary', synthesis_model=segmenter_model)
print("Reassembly complete.")

In [None]:
# --- Cell 4: Display Reassembled Summary ---
print("--- Reassembled Summary ---")
print(summary)

In [None]:
# --- Cell 5: Visualize the Hierarchical Graph ---
graph = manager.graph
if graph and graph.nodes():
    plt.figure(figsize=(18, 18))
    
    # Use Graphviz for a hierarchical layout
    try:
        pos = nx.nx_pydot.graphviz_layout(graph, prog='dot')
    except ImportError:
        print("pydot not found, using a simpler layout. For a hierarchical view, run: pip install pydot")
        pos = nx.spring_layout(graph, seed=42)
        
    # Prepare labels and colors
    labels = {node_id: f"{data['segment'].tag}\n({node_id[:4]}...)" for node_id, data in graph.nodes(data=True)}
    color_map = {'KEEP': 'skyblue', 'TRACK': 'lightgreen', 'DELETE': 'lightcoral', 'ROOT': 'gold'}
    # Special color for the root node
    if manager.root_id in graph.nodes:
        graph.nodes[manager.root_id]['segment'].tag = 'ROOT'
    node_colors = [color_map.get(data['segment'].tag, 'gray') for _, data in graph.nodes(data=True)]
    
    # Draw the graph
    nx.draw(graph, pos, labels=labels, with_labels=True, node_size=4000, node_color=node_colors, 
            font_size=10, font_weight='bold', arrows=True, arrowstyle='->', arrowsize=20, 
            edge_color='gray', node_shape='s')
    
    plt.title('Hierarchical Knowledge Graph')
    plt.savefig(Path('./results') / 'rich_pipeline_graph.png')
    plt.show()
else:
    print("No graph data to visualize.")