In [None]:
%pip install langchain openai pypdf chromadb tiktoken

# Document Loading

In [1]:
import os
import sys

from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("./LawsofTheGame2022_23.pdf")
pages = loader.load()

In [2]:
len(pages)

230

In [3]:
trimmed_pages = pages[10:200] # Just the relevant pages
page = trimmed_pages[0]

In [4]:
print(page.page_content[0:500])

11
 Football is the greatest sport on earth. It is played on every continent, in every 
country and at many different levels. The fact that the Laws of the Game are the 
same for all football throughout the world, from the FIFA World Cup™ through 
to a game between young children in a remote village, is a considerable 
strength which must continue to be harnessed for the good of football 
everywhere.
 Football must have Laws which keep the game fair – this is a crucial foundation 
of the ‘beauti


# Document Splitting

In [5]:
# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1500,
    chunk_overlap = 150
)

In [6]:
docs = text_splitter.split_documents(trimmed_pages)

In [7]:
print("Length of trimmed_pages: ", len(trimmed_pages))
print("Length of splits: ", len(docs))

Length of trimmed_pages:  190
Length of splits:  245


In [8]:
print(docs[0])

page_content='11\n Football is the greatest sport on earth. It is played on every continent, in every \ncountry and at many different levels. The fact that the Laws of the Game are the \nsame for all football throughout the world, from the FIFA World Cup™ through \nto a game between young children in a remote village, is a considerable \nstrength which must continue to be harnessed for the good of football \neverywhere.\n Football must have Laws which keep the game fair – this is a crucial foundation \nof the ‘beautiful game’ and a vital feature of the ‘spirit’ of the game. The best \nmatches are those where the referee is rarely needed because the players play \nwith respect for each other, the match officials and the Laws.\n Football’s Laws are relatively simple compared to most other team sports, but \nas many situations are subjective and match officials are human, some \ndecisions will inevitably be wrong or cause debate and discussion. For some \npeople, this discussion is part o

# Embeddings
Let's take our splits and embed them.

In [9]:
from dotenv import load_dotenv

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [10]:
from langchain.embeddings.openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [None]:
#%pip install chromadb

In [None]:
#!rm -rf ./docs/chroma

In [11]:
persist_directory = 'docs/chroma/'

In [None]:
#%pip install tiktoken

In [None]:
%pip install -U pinecone-client

# Vector Database

In [13]:
from langchain.vectorstores import Chroma

vectordb = Chroma.from_documents(
    documents=docs,
    embedding=embeddings,
    persist_directory=persist_directory
)

print(vectordb._collection.count())

245


# Initializing the Conversational Agent
Our conversational agent needs a Chat LLM, conversational memory, and a RetrievalQA chain to initialize. We create these using:

In [14]:
from langchain.chat_models import ChatOpenAI
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.chains import ConversationalRetrievalChain

# chat completion llm
llm = ChatOpenAI(
    openai_api_key=OPENAI_API_KEY,
    model_name='gpt-3.5-turbo',
    temperature=0.0
)
# conversational memory
conversational_memory = ConversationBufferWindowMemory(
    memory_key='chat_history',
    k=5,
    return_messages=True
)
# retrieval qa chain
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=vectordb.as_retriever(),
    memory=conversational_memory
)

Using these we can generate an answer using the run method:

In [15]:
question_1 = "How many players can each team have on the field at once?"
result = qa({"question": question_1})
result['answer']

'Each team can have a maximum of eleven players on the field at once, including one goalkeeper.'

In [16]:
question_2 = "What if a team has only 6 players?"
result = qa({"question": question_2})
result['answer']

'If a team has fewer than seven players, the match may not start or continue. The referee is not obliged to stop play if a team has fewer than seven players due to deliberate player exits, but the match must not resume after the ball has gone out of play if a team does not have the minimum number of seven players.'

But this isn't yet ready for our conversational agent. For that we need to convert this retrieval chain into a tool. We do that like so:

In [17]:
from langchain.agents import Tool

tools = [
    Tool(
        name='Knowledge Base',
        func=qa.run,
        description=(
            'use this tool when answering general knowledge queries to get '
            'more information about the topic'
        )
    )
]

Now we can initialize the agent like so:

In [18]:
from langchain.agents import initialize_agent

agent = initialize_agent(
    agent='chat-conversational-react-description',
    tools=tools,
    llm=llm,
    verbose=True,
    max_iterations=3,
    early_stopping_method='generate',
    memory=conversational_memory
)

In [19]:
query = "How long does a match last for?"
agent.run(query)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m{
    "action": "Knowledge Base",
    "action_input": "How long does a soccer match last?"
}[0m
Observation: [36;1m[1;3mA soccer match lasts for two equal halves of 45 minutes each.[0m
Thought:[32;1m[1;3m{
    "action": "Final Answer",
    "action_input": "A soccer match lasts for two equal halves of 45 minutes each."
}[0m

[1m> Finished chain.[0m


'A soccer match lasts for two equal halves of 45 minutes each.'

Looks great, now what if we ask it a non-general knowledge question?

In [20]:
agent("what is 2 * 7?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m{
    "action": "Final Answer",
    "action_input": "The product of 2 multiplied by 7 is 14."
}[0m

[1m> Finished chain.[0m


{'input': 'what is 2 * 7?',
 'chat_history': [HumanMessage(content='How many players can each team have on the field at once?', additional_kwargs={}, example=False),
  AIMessage(content='Each team can have a maximum of eleven players on the field at once, including one goalkeeper.', additional_kwargs={}, example=False),
  HumanMessage(content='What if a team has only 6 players?', additional_kwargs={}, example=False),
  AIMessage(content='If a team has fewer than seven players, the match may not start or continue. The referee is not obliged to stop play if a team has fewer than seven players due to deliberate player exits, but the match must not resume after the ball has gone out of play if a team does not have the minimum number of seven players.', additional_kwargs={}, example=False),
  HumanMessage(content='How long does a soccer match last?', additional_kwargs={}, example=False),
  AIMessage(content='A soccer match lasts for two equal halves of 45 minutes each.', additional_kwargs={

## LangSmith Integration

In [21]:
%pip install -U "langchain[openai]"

Collecting langchain[openai]
  Downloading langchain-0.0.274-py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting langsmith<0.1.0,>=0.0.21
  Downloading langsmith-0.0.26-py3-none-any.whl (34 kB)
Collecting tiktoken<0.4.0,>=0.3.2
  Downloading tiktoken-0.3.3-cp311-cp311-macosx_11_0_arm64.whl (706 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m706.4/706.4 kB[0m [31m23.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tiktoken, langsmith, langchain
  Attempting uninstall: tiktoken
    Found existing installation: tiktoken 0.4.0
    Uninstalling tiktoken-0.4.0:
      Successfully uninstalled tiktoken-0.4.0
  Attempting uninstall: langsmith
    Found existing installation: langsmith 0.0.19
    Uninstalling langsmith-0.0.19:
      Successfully uninstalled langsmith-0.0.19
  Attempting uninstall: langchain
    Found existing installation: l

In [23]:
from uuid import uuid4
from dotenv import load_dotenv

load_dotenv()

unique_id = uuid4().hex[0:8]
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = f"RefGPT_RAG_Chromadb - {unique_id}"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")  # Update to your API key

In [24]:
agent("What is an offside in football?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m{
    "action": "Knowledge Base",
    "action_input": "What is an offside in football?"
}[0m
Observation: [36;1m[1;3mIn football, the term "offside" refers to a rule that determines whether a player is in an illegal position when the ball is played to them. A player is considered offside if they are nearer to the opponent's goal line than both the ball and the second-to-last defender (usually the last outfield player) at the moment the ball is played to them. Being in an offside position does not necessarily mean an offense has been committed, but if an offside player becomes involved in active play by interfering with an opponent or gaining an advantage, it is considered an offside offense and results in an indirect free kick for the opposing team.[0m
Thought:[32;1m[1;3m{
    "action": "Final Answer",
    "action_input": "In football, the term 'offside' refers to a rule that determines whether a player is in an illegal

{'input': 'What is an offside in football?',
 'chat_history': [HumanMessage(content='How many players can each team have on the field at once?', additional_kwargs={}, example=False),
  AIMessage(content='Each team can have a maximum of eleven players on the field at once, including one goalkeeper.', additional_kwargs={}, example=False),
  HumanMessage(content='What if a team has only 6 players?', additional_kwargs={}, example=False),
  AIMessage(content='If a team has fewer than seven players, the match may not start or continue. The referee is not obliged to stop play if a team has fewer than seven players due to deliberate player exits, but the match must not resume after the ball has gone out of play if a team does not have the minimum number of seven players.', additional_kwargs={}, example=False),
  HumanMessage(content='How long does a soccer match last?', additional_kwargs={}, example=False),
  AIMessage(content='A soccer match lasts for two equal halves of 45 minutes each.', ad