In [1]:
from langchain.agents import initialize_agent, AgentType, Tool
from langchain.memory import ConversationBufferMemory

import sys
# sys.path.append("../")
from configs import config
from configs.llm_provider import get_llm
from tools.paper_fetch_tools_sql import paper_fetch_toolkit
from tools.paper_analyze_tools import paper_analyze_toolkit
from langchain_experimental.plan_and_execute import PlanAndExecute, load_agent_executor, load_chat_planner

# 1. Initialize LLM
config.LLM_PROVIDER = 'gemini'
llm = get_llm(config)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# 1. Create the planner (LLM decides on multi-step plan)
planner = load_chat_planner(llm)

# 2. Create the executor (agent capable of tool execution)
executor = load_agent_executor(
    llm=llm,
    tools=paper_fetch_toolkit+paper_analyze_toolkit,
    verbose=True
)

# 3. Combine into plan-and-execute agent
master_agent = PlanAndExecute(
    planner=planner,
    executor=executor,
    memory=memory,
    verbose=True
)

# # 2. Create fetch and analyze agents
# paper_fetch_agent = initialize_agent(
#     tools=paper_fetch_toolkit,
#     llm=llm,
#     agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,
#     verbose=True
# )

# paper_analyze_agent = initialize_agent(
#     tools=paper_analyze_toolkit,
#     llm=llm,
#     agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,
#     verbose=True
# )

  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)


In [2]:
from configs.logging import configure_logging
configure_logging()  # Make sure logging is set up first


In [3]:
master_agent.run("filter privacy-related papers from NIPS 2023, write a summary for each of the paper")

  master_agent.run("filter privacy-related papers from NIPS 2023, write a summary for each of the paper")




[1m> Entering new PlanAndExecute chain...[0m


ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 250
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 14
}
]

In [24]:
steps=[Step(value='Access the official NeurIPS 2023 proceedings or paper list.'), Step(value='Search the paper titles and abstracts using keywords such as "privacy," "differential privacy," "federated learning," "anonymity," "confidentiality," and "privacy-preserving."'), Step(value='Review the search results to identify papers primarily focused on privacy.'), Step(value='Select 10 distinct and relevant privacy-related papers from the identified list.'), Step(value='Present the titles and, if possible, authors or links for the 10 selected papers.'), Step(value='Given the above steps taken, please respond to the users original question.')]


NameError: name 'Step' is not defined

In [1]:
import pymupdf
from utils.paper_crawler import download_pdf

In [3]:
resp = download_pdf("https://www.dfki.de/fileadmin/user_upload/import/5224_paper12.pdf")

In [6]:
doc = pymupdf.open(resp['data']['path'])

In [16]:
doc

Document('temp/paper.pdf')

In [8]:
text = ''
for page in doc:
    text += page.get_text()

In [11]:
from pprint import pprint
len(text)

43265

In [12]:
from configs.llm_provider import get_text_splitter

In [13]:
from configs import config

In [14]:
text_splitter = get_text_splitter(config)

In [20]:
from langchain_core.documents import Document
document = [Document(text)]

In [21]:
document

[Document(metadata={}, page_content='Advances in Deep Parsing of Scholarly Paper\nContent\nUlrich Sch¨afer and Bernd Kiefer\nLanguage Technology Lab\nGerman Research Center for Artiﬁcial Intelligence (DFKI)\nCampus D3 1, D-66123 Saarbr¨ucken, Germany\n{ulrich.schaefer,kiefer}@dfki.de\nhttp://www.dfki.de/lt\nAbstract. We report on advances in deep linguistic parsing of the full\ntextual content of 8200 papers from the ACL Anthology, a collection of\nelectronically available scientiﬁc papers in the ﬁelds of Computational\nLinguistics and Language Technology.\nWe describe how – by incorporating new techniques – we increase both\nspeed and robustness of deep analysis, speciﬁcally on long sentences\nwhere deep parsing often failed in former approaches. With the current\nopen source HPSG (Head-driven phrase structure grammar) for English\n(ERG), we obtain deep parses for more than 85% of the sentences in the\n1.5 million sentences corpus, while the former approaches achieved only\napprox. 65

In [22]:
split_chunks = text_splitter.split_documents(document)

In [23]:
split_chunks

[Document(metadata={}, page_content='Advances in Deep Parsing of Scholarly Paper\nContent\nUlrich Sch¨afer and Bernd Kiefer\nLanguage Technology Lab\nGerman Research Center for Artiﬁcial Intelligence (DFKI)\nCampus D3 1, D-66123 Saarbr¨ucken, Germany\n{ulrich.schaefer,kiefer}@dfki.de\nhttp://www.dfki.de/lt\nAbstract. We report on advances in deep linguistic parsing of the full\ntextual content of 8200 papers from the ACL Anthology, a collection of\nelectronically available scientiﬁc papers in the ﬁelds of Computational\nLinguistics and Language Technology.\nWe describe how – by incorporating new techniques – we increase both\nspeed and robustness of deep analysis, speciﬁcally on long sentences'),
 Document(metadata={}, page_content='We describe how – by incorporating new techniques – we increase both\nspeed and robustness of deep analysis, speciﬁcally on long sentences\nwhere deep parsing often failed in former approaches. With the current\nopen source HPSG (Head-driven phrase structur