In [12]:
import dotenv
from pydantic import BaseModel, Field
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.document_loaders import PyMuPDFLoader
import os
from config import OUTPUT_DIR, DEFAULT_MODEL

dotenv.load_dotenv()

True

In [13]:
# ensure output directory exists
os.makedirs(OUTPUT_DIR, exist_ok=True)


Pydantic Models for Structured Output

In [4]:
# Define the model
class FiguresCount(BaseModel):
    total_figures: int = Field(description="Total number of figures in the paper")


Load the Paper

In [5]:
loader = PyMuPDFLoader("papers/p2xa_paper.pdf")
document = loader.load()


In [6]:
from templates import FIGURE_COUNT_TEMPLATE

llm = ChatOpenAI(model_name=DEFAULT_MODEL)
prompt = PromptTemplate(template=FIGURE_COUNT_TEMPLATE, input_variables=["text"])
chain = prompt | llm.with_structured_output(FiguresCount)
response = chain.invoke({"text": document})

print(response)

total_figures=5


In [7]:
%load_ext autoreload
%autoreload 2

from utils import process_figure_answers, expand_figure_answers, write_analysis_to_file

In [8]:
# Extracting information about each figure from the paper 
answers = process_figure_answers(document, response.total_figures)
expanded_answers = expand_figure_answers(document, answers)

Processing Figure 1...
Processing Figure 2...
Processing Figure 3...
Processing Figure 4...
Processing Figure 5...
Expanding Answers for Figure 1...
Expanding Answers for Figure 2...
Expanding Answers for Figure 3...
Expanding Answers for Figure 4...
Expanding Answers for Figure 5...


Lets look at our answers

In [14]:

# Example usage
output_file = f"{OUTPUT_DIR}/figure_analysis_results.txt"
write_analysis_to_file(answers, expanded_answers, output_file)

Analysis written successfully to output/figure_analysis_results.txt
