# DOM Model with Pydantic and Pandoc Integration
This notebook demonstrates a Document Object Model (DOM) using Pydantic for static typing and validation, and integrates Pandoc (via pypandoc) for Markdown processing.

In [None]:
#| default_exp dom
#| export
from typing import List, Optional
from pydantic import BaseModel, Field, validator
import base64
import pypandoc
import pathlib

## Base Element Class

In [None]:
#| export
class Element(BaseModel):
    summary: Optional[str] = None

## Figure Class with Base64 Validation

In [None]:
#| export
class Figure(Element):
    rawdata: str = Field(..., description="Base64-encoded image data")
    @validator('rawdata')
    def validate_base64(cls, v):
        try:
            base64.b64decode(v)
        except Exception:
            raise ValueError('rawdata must be valid base64')
        return v

## Table Structure: Cell, Column, Row, Table

In [None]:
#| export
class Cell(BaseModel):
    c: str
class Column(BaseModel):
    cells: List[Cell]
class Row(BaseModel):
    cols: List[Column]
class Table(Element):
    rows: List[Row]

## Markdown Class with pypandoc Integration

In [None]:
#| export
class Markdown(BaseModel):
    content: str = ""
    def to_markdown(self) -> str:
        return self.content
    def to_html(self) -> str:
        return pypandoc.convert_text(self.content, 'md', 'html')
    def to_latex(self) -> str:
        return pypandoc.convert_text(self.content, 'md', 'latex')
    def walk(self, action):
        import json
        ast_json = pypandoc.convert_text(self.content, 'md', 'json')
        ast = json.loads(ast_json)
        def walk_node(node):
            node = action(node)
            if isinstance(node, dict):
                for key, value in node.items():
                    if isinstance(value, list):
                        node[key] = [walk_node(child) if isinstance(child, (dict, list)) else child for child in value]
                    elif isinstance(value, dict):
                        node[key] = walk_node(value)
            elif isinstance(node, list):
                node = [walk_node(child) if isinstance(child, (dict, list)) else child for child in node]
            return node
        ast = walk_node(ast)
        new_json = json.dumps(ast)
        self.content = pypandoc.convert_text(new_json, 'json', 'md')
    @staticmethod
    def from_file(filepath: pathlib.Path):
        content = filepath.read_text(encoding='utf-8')
        return Markdown(content=content)

## Section Class: Recursive Document Structure

In [None]:
#| export
class Section(BaseModel):
    summary: Optional[str] = None
    paragraphs: List[str] = Field(default_factory=list)
    figures: List[Figure] = Field(default_factory=list)
    tables: List[Table] = Field(default_factory=list)
    subsections: List['Section'] = Field(default_factory=list)
    def __init__(self, summary: Optional[str] = None, paragraphs: Optional[List[str]] = None,
                 figures: Optional[List[Figure]] = None, tables: Optional[List[Table]] = None,
                 subsections: Optional[List[dict]] = None):
        # Recursively initialize subsections if provided as dicts
        if subsections is not None:
            subs = [Section(**s) if isinstance(s, dict) else s for s in subsections]
        else:
            subs = []
        super().__init__(
            summary=summary,
            paragraphs=paragraphs or [],
            figures=figures or [],
            tables=tables or [],
            subsections=subs
        )
    @classmethod
    def init(cls, md: Markdown):
        # Placeholder for initialization from Markdown
        return cls()
    @staticmethod
    def update_forward_refs():
        # BaseModel.update_forward_refs(Section=Section)
        BaseModel.model_rebuild()
# Support for recursive Section references
Section.update_forward_refs()