# Enhanced Markdown Processing with Figure Descriptions
This notebook demonstrates how to extract markdown from PDFs and enrich it with AI-generated figure descriptions

## Imports

In [None]:
from IPython.display import Markdown, display
from llm_synthesis.utils import extract_markdown, process_paper_with_figure_descriptions
from dotenv import load_dotenv
load_dotenv(override=True)

## Set Paths

In [2]:
MAIN_PDF_PATH = "/Users/siddharthbetala/Desktop/llm-synthesis/data/pdf_papers/1706.03762v7.pdf"  # Update this path!
SI_PDF_PATH = None  # Optional - set to None if not available

## Basic Markdown Extraction (Without Figure Descriptions)

In [3]:
basic_markdown = extract_markdown(
    pdf_path=MAIN_PDF_PATH,
    engine="mistral",  # Use Mistral for extraction
    image_mode="embedded",  # Keep images embedded for figure analysis
    save_markdown=False,  # Don't save yet - we'll enhance it first
)

In [None]:
display(Markdown(basic_markdown))

## Enhanced Processing with Figure Descriptions

In [None]:
enhanced_markdown = process_paper_with_figure_descriptions(
    pdf_path=MAIN_PDF_PATH,
    si_pdf_path=SI_PDF_PATH,  # Optional supporting info
    engine="mistral",  # PDF extraction engine
    llm_name="gpt-4o-mini",  # LLM for figure descriptions (cost-effective)
    model_kwargs={
        "temperature": 0.1,  # Low temperature for consistent descriptions
        "max_tokens": 3000,  # Sufficient tokens for detailed descriptions
    },
    root_dir=None,
    save_output=False,
)

In [None]:
display(Markdown(enhanced_markdown))