In [None]:
import sys
from pathlib import Path
import logging
import torch
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

# --- Setup Logging and Paths ---
logging.basicConfig(level=logging.INFO, filename='notebook.log', filemode='w')
logger = logging.getLogger(__name__)

project_root = Path('.').resolve()
src_path = project_root / 'layered-context-graph' / 'src'
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))
logger.info(f"Project root set to: {project_root}")

from models.baai_model import BAAIModel
from models.qwq_model import QwQModel
from partitioning.partition_manager import PartitionManager

In [None]:
# --- Cell 2: Model Initialization ---
BAAI_MODEL_PATH = './bge-en-icl'
QWQ_MODEL_PATH = './QwQ_LCoT_7B_Instruct/'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print("Initializing BAAIModel (lazy)...")
embedding_model = BAAIModel(model_path=BAAI_MODEL_PATH, device=device)
print("BAAIModel initialized.")

print("Initializing QwQModel (lazy)...")
segmenter_model = QwQModel(qwq_model_path=QWQ_MODEL_PATH, device=device)
print("QwQModel initialized.")

In [None]:
# --- Cell 3: End-to-End Partition and Graph Test ---
pipeline_text = open('Layer_Context_Window_Graphs', 'r').read() if Path('Layer_Context_Window_Graphs').exists() else "The sun rises in the east. Birds sing. Later, the topic shifts to astrophysics. The lifecycle of a star is complex. Returning to our original theme, the weather is warm."
segmentation_rule = "Split into paragraphs."

# 1. Initialize the manager
manager = PartitionManager(similarity_threshold=0.7)
manager.initialize_graph(pipeline_text)

# 2. Perform segmentation
print(f"Applying Rule: '{segmentation_rule}'")
leaves_to_process = manager.get_leaves_to_process()
for segment_to_split in leaves_to_process:
    new_content_pieces = segmenter_model.segment(segmentation_rule, segment_to_split.content)
    manager.add_child_segments(segment_to_split.id, new_content_pieces)

# 3. Add semantic edges
manager.add_semantic_edges(embedding_model)

# 4. Classify nodes
manager.classify()

print(f"Graph construction complete. Found {len(manager.segments)} nodes and {manager.graph.number_of_edges()} edges.")

In [None]:
# --- Cell 4: Visualize the Graph ---
graph = manager.graph
if graph and graph.nodes():
    plt.figure(figsize=(12, 12))
    pos = nx.spring_layout(graph, seed=42)
    
    labels = {node_id: f"{data['segment'].id[:4]}...\n({data['segment'].tag})" for node_id, data in graph.nodes(data=True)}
    
    color_map = {'KEEP': 'skyblue', 'TRACK': 'lightgreen', 'DELETE': 'lightcoral'}
    if manager.root_id in graph.nodes:
        graph.nodes[manager.root_id]['segment'].tag = 'ROOT'
        color_map['ROOT'] = 'gold'
        
    node_colors = [color_map.get(data['segment'].tag, 'gray') for _, data in graph.nodes(data=True)]

    nx.draw(graph, pos, labels=labels, with_labels=True, node_size=2000, node_color=node_colors, font_size=8, font_weight='bold')
    plt.title('Knowledge Graph from PartitionManager')
    plt.show()
else:
    print("No graph data to visualize.")

In [None]:
# --- Cell 5: Print Deleted Segments ---
print("--- Segments Marked for Deletion ---")
deleted_segments = [seg for seg in manager.segments.values() if seg.tag == 'DELETE']

if deleted_segments:
    for i, segment in enumerate(deleted_segments):
        print(f"--- Deleted Segment {i+1} (ID: {segment.id}) ---")
        print(segment.content)
        print("-" * 40)
else:
    print("No segments were marked for deletion.")