# ML Paper Webpage Generator

Transform ML research papers into beautiful, interactive webpages with animations and embedded figures using **Kimi K2.5** via NVIDIA AI endpoints.

**Architecture:**
```
START -> extract_pdf -> analyze_paper -> generate_webpage -> END
```

Kimi generates the entire webpage from scratch based on the paper content - no templates!

## 1. Setup & Configuration

In [None]:
import os
import getpass
import base64
import json
from io import BytesIO
from pathlib import Path
from typing import TypedDict

from dotenv import load_dotenv
from PIL import Image
import pypdfium2 as pdfium

from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_core.messages import HumanMessage, SystemMessage
from langgraph.graph import StateGraph, START, END

load_dotenv()

# Model configuration
MODEL_NAME = "moonshotai/kimi-k2.5"
DEFAULT_TEMPERATURE = 0.7
DEFAULT_MAX_TOKENS = 32768 

# Get API key
NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")
if not NVIDIA_API_KEY:
    NVIDIA_API_KEY = getpass.getpass("Enter your NVIDIA API key: ")

print(f"Model: {MODEL_NAME}")
print(f"API Key configured: {'Yes' if NVIDIA_API_KEY else 'No'}")

Model: moonshotai/kimi-k2.5
API Key configured: Yes


## 2. PDF Utilities

In [3]:
def extract_pdf_pages(pdf_path: str, dpi: int = 150) -> list[Image.Image]:
    """Convert PDF pages to PIL Images."""
    pdf = pdfium.PdfDocument(pdf_path)
    images = []
    scale = dpi / 72  # Convert DPI to scale factor (72 is PDF default)
    
    for page in pdf:
        bitmap = page.render(scale=scale)
        img = bitmap.to_pil()
        images.append(img)
    
    pdf.close()
    return images


def image_to_base64(image: Image.Image, format: str = "JPEG", quality: int = 85) -> str:
    """Convert PIL Image to base64 data URL."""
    buffer = BytesIO()
    if format == "JPEG":
        # Convert to RGB if necessary (pypdfium2 may return RGBA)
        if image.mode == "RGBA":
            image = image.convert("RGB")
        image.save(buffer, format=format, quality=quality)
    else:
        image.save(buffer, format=format)
    buffer.seek(0)
    
    encoded = base64.b64encode(buffer.read()).decode("utf-8")
    mime_type = f"image/{format.lower()}"
    return f"data:{mime_type};base64,{encoded}"


def images_to_base64_list(images: list[Image.Image]) -> list[str]:
    """Convert list of PIL Images to base64 data URLs."""
    return [image_to_base64(img) for img in images]


print("PDF utilities loaded.")

PDF utilities loaded.


## 3. Kimi Client

In [4]:
def create_kimi_client(
    temperature: float = DEFAULT_TEMPERATURE,
    max_completion_tokens: int = DEFAULT_MAX_TOKENS,
) -> ChatNVIDIA:
    """Create a ChatNVIDIA client configured for Kimi K2.5."""
    return ChatNVIDIA(
        model=MODEL_NAME,
        api_key=NVIDIA_API_KEY,
        temperature=temperature,
        max_completion_tokens=max_completion_tokens,
    )


client = create_kimi_client()
print(f"Client created: {type(client).__name__}")

Client created: ChatNVIDIA




## 4. Workflow State and Prompts

In [5]:
class WorkflowState(TypedDict):
    """State for the paper webpage generator workflow."""
    pdf_path: str
    page_images: list[str]  # base64 encoded
    paper_analysis: dict    # structured analysis from vision model
    html_output: str        # final HTML generated by Kimi


# Step 1: Analyze the paper
ANALYSIS_SYSTEM_PROMPT = """You are an expert ML researcher. Analyze research papers and extract structured information."""

ANALYSIS_USER_PROMPT = """Analyze this ML research paper. Extract the following as JSON:

{
    "title": "Paper title",
    "authors": ["Author 1", "Author 2"],
    "abstract": "Full abstract",
    "key_contributions": ["Contribution 1", "Contribution 2"],
    "sections": [{"title": "Section", "summary": "Summary"}],
    "figures": [{"number": "1", "caption": "Caption", "description": "What it shows", "page": 1}],
    "key_equations": [{"equation": "LaTeX", "meaning": "Explanation"}],
    "core_concepts": ["Concept 1", "Concept 2"],
    "methodology": "Brief methodology description",
    "results": "Key results",
    "conclusions": "Main conclusions",
    "key_diagram": "Describe the most important diagram/figure in the paper - what does it show? What are the components and how do they connect?"
}

Return ONLY valid JSON, no markdown."""


# Step 2: Generate a SIMPLE webpage with ONE animation
WEBPAGE_SYSTEM_PROMPT = """You are a web developer. Generate clean HTML with one CSS animation."""

WEBPAGE_USER_PROMPT = """Create a single-page HTML summary of this paper.

## Paper Analysis:
{analysis_json}

## REQUIREMENTS:

1. **Keep it concise** - Under 500 lines of HTML total
2. **Simple structure:**
   - Title and authors
   - Abstract
   - Key contributions (bullet list)
   - **ONE CSS animation** that illustrates the key_diagram from the analysis
   - Core concepts (bullet list)  
   - Conclusions

3. **The animation section:**
   - Create a CSS @keyframes animation that visualizes the paper's main diagram/architecture
   - Use simple shapes (divs, borders) to represent components
   - Animate data flow, connections, or the key process
   - Make it educational - help the reader understand how the system works

4. **NO figures/images** - The animation replaces static figures
5. **Minimal JavaScript** - Only if needed for the animation
6. **Light theme only**

Return ONLY the HTML starting with <!DOCTYPE html>."""


print("Prompts defined.")

Prompts defined.


## 5. Workflow Nodes

In [6]:
import re

def extract_pdf_node(state: WorkflowState) -> dict:
    """Extract PDF pages as base64 images."""
    print(f"Extracting PDF: {state['pdf_path']}")
    
    images = extract_pdf_pages(state["pdf_path"])
    print(f"  Extracted {len(images)} pages")
    
    base64_images = images_to_base64_list(images)
    print(f"  Converted to base64")
    
    return {"page_images": base64_images}


def analyze_paper_node(state: WorkflowState) -> dict:
    """Analyze paper using vision model."""
    print("Analyzing paper with vision model...")
    
    client = create_kimi_client(max_completion_tokens=8192)
    
    # Build message with all page images
    content = [{"type": "text", "text": ANALYSIS_USER_PROMPT}]
    
    for i, img_base64 in enumerate(state["page_images"]):
        content.append({
            "type": "image_url",
            "image_url": {"url": img_base64}
        })
    
    print(f"  Sending {len(state['page_images'])} pages to vision model...")
    
    messages = [
        SystemMessage(content=ANALYSIS_SYSTEM_PROMPT),
        HumanMessage(content=content),
    ]
    
    response = client.invoke(messages)
    response_text = response.content
    
    # Parse JSON
    try:
        if "```json" in response_text:
            response_text = response_text.split("```json")[1].split("```")[0]
        elif "```" in response_text:
            response_text = response_text.split("```")[1].split("```")[0]
        
        analysis = json.loads(response_text.strip())
        print(f"  Analysis complete: {analysis.get('title', 'Unknown')}")
    except json.JSONDecodeError as e:
        print(f"  JSON parse error: {e}")
        analysis = {
            "title": "Paper",
            "authors": [],
            "abstract": response_text[:2000],
            "key_contributions": [],
            "sections": [],
            "figures": [],
            "key_equations": [],
            "core_concepts": [],
            "methodology": "",
            "results": "",
            "conclusions": ""
        }
    
    return {"paper_analysis": analysis}


def generate_webpage_node(state: WorkflowState) -> dict:
    """Generate minimal HTML webpage using Kimi."""
    print("Generating minimal webpage with Kimi...")
    
    # Keep token limit reasonable for thinking
    client = create_kimi_client(max_completion_tokens=16384)
    
    # Only send the analysis JSON - NO images
    prompt = WEBPAGE_USER_PROMPT.format(
        analysis_json=json.dumps(state["paper_analysis"], indent=2)
    )
    
    messages = [
        SystemMessage(content=WEBPAGE_SYSTEM_PROMPT),
        HumanMessage(content=prompt),
    ]
    
    print("  Calling Kimi to generate HTML (minimal)...")
    response = client.invoke(messages)
    html = response.content
    
    # Clean up if wrapped in markdown
    if html.startswith("```html"):
        html = html[7:]
    if html.startswith("```"):
        html = html[3:]
    if html.endswith("```"):
        html = html[:-3]
    html = html.strip()
    
    # Inject actual images if any placeholders exist
    if "PAGE_IMAGE" in html and state.get("page_images"):
        print("  Injecting images into HTML...")
        for i, img_base64 in enumerate(state["page_images"]):
            html = html.replace(f"{{{{PAGE_IMAGE_{i+1}}}}}", img_base64)
        html = re.sub(
            r'\{\{PAGE_IMAGE_(\d+)\}\}',
            lambda m: state["page_images"][int(m.group(1))-1] if int(m.group(1)) <= len(state["page_images"]) else "",
            html
        )
    
    print(f"  Generated {len(html)} characters of HTML")
    
    return {"html_output": html}


print("Workflow nodes defined.")

Workflow nodes defined.


## 6. Build the Graph

In [7]:
def build_workflow() -> StateGraph:
    """Build the LangGraph workflow for paper processing."""
    graph = StateGraph(WorkflowState)
    
    graph.add_node("extract_pdf", extract_pdf_node)
    graph.add_node("analyze_paper", analyze_paper_node)
    graph.add_node("generate_webpage", generate_webpage_node)
    
    graph.add_edge(START, "extract_pdf")
    graph.add_edge("extract_pdf", "analyze_paper")
    graph.add_edge("analyze_paper", "generate_webpage")
    graph.add_edge("generate_webpage", END)
    
    return graph


print("Workflow builder defined.")

Workflow builder defined.


## 7. Paper Webpage Workflow

In [8]:
class PaperWebpageWorkflow:
    """Workflow for converting ML papers to interactive webpages."""
    
    def __init__(self):
        graph = build_workflow()
        self.app = graph.compile()
        print("PaperWebpageWorkflow initialized.")
    
    def process(self, pdf_path: str) -> dict:
        """Process a PDF and return the result."""
        path = Path(pdf_path).resolve()
        if not path.exists():
            raise FileNotFoundError(f"PDF not found: {path}")
        
        print(f"\nProcessing: {path}")
        print("=" * 50)
        
        result = self.app.invoke({"pdf_path": str(path)})
        
        print("=" * 50)
        print("Processing complete!")
        
        return result
    
    def process_and_save(self, pdf_path: str, output_path: str = None) -> str:
        """Process a PDF and save the HTML output."""
        result = self.process(pdf_path)
        
        if output_path is None:
            output_path = Path(pdf_path).with_suffix(".html")
        
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(result["html_output"])
        
        print(f"\nSaved HTML to: {output_path}")
        return str(output_path)


print("PaperWebpageWorkflow class defined.")

PaperWebpageWorkflow class defined.


---

# Demo

In [14]:
# Create the workflow
workflow = PaperWebpageWorkflow()

PaperWebpageWorkflow initialized.


In [15]:
# Process a paper - uncomment and set your PDF path
result = workflow.process_and_save("Kimi K2.5 Visual Agentic Intelligence Technical Report.pdf")


Processing: /home/chris/Code/NVIDIA/GenerativeAIExamples/oss_tutorials/Kimi_K2.5_Paper_to_Page_Vision_Workflow/Kimi K2.5 Visual Agentic Intelligence Technical Report.pdf
Extracting PDF: /home/chris/Code/NVIDIA/GenerativeAIExamples/oss_tutorials/Kimi_K2.5_Paper_to_Page_Vision_Workflow/Kimi K2.5 Visual Agentic Intelligence Technical Report.pdf
  Extracted 15 pages
  Converted to base64
Analyzing paper with vision model...




  Sending 15 pages to vision model...
  Analysis complete: Kimi K2.5: Visual Agentic Intelligence
Generating minimal webpage with Kimi...
  Calling Kimi to generate HTML (minimal)...
  Generated 13667 characters of HTML
Processing complete!

Saved HTML to: Kimi K2.5 Visual Agentic Intelligence Technical Report.html


In [None]:
# Display the paper in the notebook
from IPython.display import IFrame
from pathlib import Path
import base64

html_content = Path("Kimi K2.5 Visual Agentic Intelligence Technical Report.html").read_text()
encoded = base64.b64encode(html_content.encode()).decode()

IFrame(src=f"data:text/html;base64,{encoded}", width="100%", height=800)