In [1]:
from langchain_community.document_loaders import PyPDFLoader

file_path = "./Paper.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()

print(len(docs))

17


In [6]:
docs[0].page_content[0:100]

'A Theory of ‘The Loop’: Policy-making and Information\nAggregation through Networks *\nJenny S. Kim†Jo'

In [9]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o")

In [8]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

retriever = vectorstore.as_retriever()

In [11]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

results = rag_chain.invoke({"input": "What is \"The Loop?\""})

results

{'input': 'What is "The Loop?"',
 'context': [Document(page_content='A Theory of ‘The Loop’: Policy-making and Information\nAggregation through Networks *\nJenny S. Kim†John W. Patty‡\nAbstract\nWe describe a model of strategic, decentralized and asynchronous communication in policy-\nmaking networks. Two central focuses of the model are the actors’ awareness of who other ac-\ntors will talk to in the future and the sequential ordering of actors’ communications. We derive\nconditions for truthful “cheap-talk” communication within sequential communication networks\nand show that (1) the ordering of individuals within the network can matter above and beyond\nindividuals’ policy preferences and degree of decision-making authority, (2) sequential commu-\nnication throughout can engender credible communication in situations in which private, dyadic\ncommunication will not, and (3) sequential communication can sometimes undermine credible\ncommunication, so that exclusion of one or more “ext

In [24]:
results['answer']

'"The Loop" refers to a model of strategic, decentralized, and asynchronous communication within policy-making networks. It emphasizes the importance of the sequential ordering of actors\' communications and their awareness of future interactions. The loop structure can facilitate credible communication by providing a special type of commitment to sharing information broadly, but it can also sometimes undermine credibility, depending on the inclusion or exclusion of certain individuals and their positions within the network.'

In [25]:
from langchain_core.output_parsers import StrOutputParser
StrOutputParser().invoke(results['answer'])

'"The Loop" refers to a model of strategic, decentralized, and asynchronous communication within policy-making networks. It emphasizes the importance of the sequential ordering of actors\' communications and their awareness of future interactions. The loop structure can facilitate credible communication by providing a special type of commitment to sharing information broadly, but it can also sometimes undermine credibility, depending on the inclusion or exclusion of certain individuals and their positions within the network.'

In [22]:
from typing import Any

from fastapi import FastAPI
from langchain.agents import AgentExecutor
from langchain.agents.format_scratchpad import format_to_openai_functions
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
from langchain.pydantic_v1 import BaseModel
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.tools import tool
from langchain_core.utils.function_calling import convert_to_openai_function
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

from langserve import add_routes

vectorstore = FAISS.from_texts(
    ["cats like fish", "dogs like sticks"], embedding=OpenAIEmbeddings()
)
retriever = vectorstore.as_retriever()


@tool
def get_eugene_thoughts(query: str) -> list:
    """Returns Eugene's thoughts on a topic."""
    return retriever.get_relevant_documents(query)


tools = [get_eugene_thoughts]

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant."),
        # Please note that the ordering of the user input vs.
        # the agent_scratchpad is important.
        # The agent_scratchpad is a working space for the agent to think,
        # invoke tools, see tools outputs in order to respond to the given
        # user input. It has to come AFTER the user input.
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

# We need to set streaming=True on the LLM to support streaming individual tokens.
# Tokens will be available when using the stream_log / stream events endpoints,
# but not when using the stream endpoint since the stream implementation for agent
# streams action observation pairs not individual tokens.
# See the client notebook that shows how to use the stream events endpoint.
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, streaming=True)

llm_with_tools = llm.bind(functions=[convert_to_openai_function(t) for t in tools])

agent = (
    {
        "input": lambda x: x["input"],
        "agent_scratchpad": lambda x: format_to_openai_functions(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | llm_with_tools
    | OpenAIFunctionsAgentOutputParser()
)

agent_executor = AgentExecutor(agent=agent, tools=tools)

In [23]:
agent_executor.invoke(input={"input": "Hi"})

{'input': 'Hi', 'output': 'Hello! How can I assist you today?'}