# Corpus Exploration

Interactive exploration of the Corpus class for development and testing.
Use this notebook to experiment with the ENEX parsing functionality.

In [None]:
import sys
from pathlib import Path

sys.path.insert(0, str(Path.cwd().parent / "src"))

import enote

In [None]:
# Explore the Corpus class
corpus = enote.Corpus()
print("Corpus attributes:")
print(dir(corpus))

In [None]:
# Look at the documentation
help(corpus.load)

In [None]:
# Load some notes and explore
corpus.load(max_notes=2)
print(f"Loaded {len(corpus.notes)} notes")

In [None]:
# Explore the notes structure
for note_id, note_data in corpus.notes.items():
    print(f"\n=== {note_id} ===")
    print(f"Title: {note_data['title']}")
    print(f"Tags: {note_data['tags']}")
    print(f"Keys: {list(note_data.keys())}")
    break  # Just show first one

In [None]:
# Try some operations
first_note = list(corpus.notes.values())[0]
print("First note structure:")
for key, value in first_note.items():
    print(f"{key}: {type(value)} - {str(value)[:50]}...")

In [None]:
# Test the new cleaned_text functionality
for note_id, note_data in corpus.notes.items():
    original = note_data.get('content', '')
    cleaned = note_data.get('cleaned_text', '')
    
    print(f"\n=== {note_id}: {note_data['title']} ===")
    print(f"Original length: {len(original)} chars")
    print(f"Cleaned length: {len(cleaned)} chars")
    
    if len(original) > 0:
        reduction = ((len(original) - len(cleaned)) / len(original)) * 100
        print(f"Size reduction: {reduction:.1f}%")
    
    print(f"\nCleaned text preview:")
    print(cleaned[:200] + "..." if len(cleaned) > 200 else cleaned)
    
    break  # Just show first one