In [None]:
%pip install langchain openai pypdf chromadb tiktoken

# Document Loading

In [1]:
import os
import sys

from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("./LawsofTheGame2023_24.pdf")
pages = loader.load()

In [2]:
len(pages)

230

In [3]:
trimmed_pages = pages[10:200] # Just the relevant pages
page = trimmed_pages[0]

In [4]:
print(page.page_content[0:500])

11
 Football is the greatest sport on earth. It is played on every continent, in every 
country and at many different levels. The fact that the Laws of the Game are the 
same for all football throughout the world, from the FIFA World Cup™ through 
to a game between young children in a remote village, is a considerable 
strength which must continue to be harnessed for the good of football 
everywhere.
 Football must have Laws which keep the game fair – this is a crucial foundation 
of the ‘beauti


In [5]:
# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1500,
    chunk_overlap = 150
)

In [6]:
docs = text_splitter.split_documents(trimmed_pages)

In [7]:
print("Length of trimmed_pages: ", len(trimmed_pages))
print("Length of splits: ", len(docs))

Length of trimmed_pages:  190
Length of splits:  246


In [None]:
print(docs[0])

# Embeddings
Let's take our splits and embed them.

In [8]:
from dotenv import load_dotenv

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [9]:
from langchain.embeddings.openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [None]:
#%pip install chromadb

In [10]:
persist_directory = 'docs_2023_24/chroma/'

# Vector Database

In [11]:
from langchain.vectorstores import Chroma

vectordb = Chroma.from_documents(
    documents=docs,
    embedding=embeddings,
    persist_directory=persist_directory
)

print(vectordb._collection.count())

246


# Initializing the Conversational Agent
Our conversational agent needs a Chat LLM, conversational memory, and a RetrievalQA chain to initialize. We create these using:

In [12]:
from langchain.chat_models import ChatOpenAI
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.chains import ConversationalRetrievalChain

# chat completion llm
llm = ChatOpenAI(
    openai_api_key=OPENAI_API_KEY,
    model_name='gpt-3.5-turbo',
    temperature=0.0
)
# conversational memory
conversational_memory = ConversationBufferWindowMemory(
    memory_key='chat_history',
    k=5,
    return_messages=True
)
# retrieval qa chain
convo_chain = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=vectordb.as_retriever(),
    memory=conversational_memory
)

In [13]:
from uuid import uuid4
from dotenv import load_dotenv

load_dotenv()

unique_id = uuid4().hex[0:8]
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = f"RefGPT_2023_24_RAG_Chromadb - {unique_id}"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")  # Update to your API key

In [28]:
question = "What is the coorect sanction if a referee awards a penalty kick for an offence which involved a defending team player challenging an opponent for the ball (excluding holding, pulling, pushing, no possibility to play the ball etc.)?"
result = convo_chain({"question": question})
result['answer']

'If a referee awards a penalty kick for an offense where a defending team player challenges an opponent for the ball, excluding holding, pulling, pushing, or any action that prevents the opponent from playing the ball, the appropriate sanction is a caution (yellow card) for the offending player.'