# Docling-Graph: Getting Started Tutorial
This notebook will walk you through the basics of using docling-graph to convert documents into knowledge graphs.

## Installation & Setup

In [None]:
# Install if needed
# !pip install -e .

# Imports
from pathlib import Path
from pydantic import BaseModel, Field
from typing import Optional, List
import yaml

from docling_graph.graph_converter import GraphConverter
from docling_graph.graph_visualizer import create_interactive_graph
from docling_graph.models import Edge

## Step 1: Define Your Pydantic Template

In [None]:
class Person(BaseModel):
    """Person entity"""
    name: str = Field(..., description="Full name")
    age: Optional[int] = None
    email: Optional[str] = None
    
    model_config = {
        "graph_id_fields": ["name"]  # Use name as unique identifier
    }

class Company(BaseModel):
    """Company entity"""
    name: str
    industry: Optional[str] = None
    
    model_config = {
        "graph_id_fields": ["name"]
    }

class Invoice(BaseModel):
    """Invoice document template"""
    invoice_number: str
    date: Optional[str] = None
    amount: Optional[float] = None
    
    # Relationships (implicit edges)
    customer: Optional[Person] = None
    vendor: Optional[Company] = None
    
    model_config = {
        "graph_id_fields": ["invoice_number"]
    }

## Step 2: Create Sample Data

In [None]:
# Create instances
customer = Person(name="Alice Dupont", age=35, email="alice@example.com")
vendor = Company(name="ACME Corp", industry="Technology")

invoice = Invoice(
    invoice_number="INV-2024-001",
    date="2024-10-25",
    amount=1500.00,
    customer=customer,
    vendor=vendor
)

print(f"Created invoice: {invoice.invoice_number}")
print(f"Customer: {invoice.customer.name}")
print(f"Vendor: {invoice.vendor.name}")

## Step 3: Convert to Knowledge Graph

In [None]:
# Initialize converter
converter = GraphConverter(add_reverse_edges=False)

# Convert Pydantic objects to graph
graph = converter.pydantic_list_to_graph([invoice])

print(f"Graph Statistics:")
print(f"  Nodes: {graph.number_of_nodes()}")
print(f"  Edges: {graph.number_of_edges()}")
print(f"\nNode types:")
for node, data in graph.nodes(data=True):
    print(f"  {node}: {data.get('label', 'Unknown')}")

## Step 4: Visualize the Graph

In [None]:
# Create visualization
output_path = Path("outputs/notebook_example")
output_path.parent.mkdir(parents=True, exist_ok=True)

create_interactive_graph(graph, output_path)
print(f"Interactive graph saved to: {output_path}.html")
print("Open the HTML file in your browser to explore!")

## Step 5: Query the Graph

In [None]:
# Find all Person nodes
persons = [(n, d) for n, d in graph.nodes(data=True) if d.get('label') == 'Person']
print(f"Found {len(persons)} Person nodes:")
for node_id, data in persons:
    print(f"  - {data.get('name')} (age: {data.get('age', 'N/A')})")

# Find all edges
print(f"\nGraph edges:")
for u, v, data in graph.edges(data=True):
    print(f"  {u} --[{data.get('label')}]--> {v}")