In [46]:
import langchain
print(langchain.__version__)

0.3.9


## Setup OS Environment Variable

In [47]:
import os
os.environ["OPENAI_API_KEY"]="XXX"

In [None]:
# # For langsmith
# os.environ["LANGCHAIN_TRACING_V2"] = "XXX"
# os.environ["LANGCHAIN_API_KEY"] = "XXX"
# os.environ["LANGCHAIN_PROJECT"] = "XXX"

## Call LLM

In [49]:
# from langchain_ollama import OllamaLLM
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    model="qwen2.5:7b",
    base_url="http://localhost:11434/v1",
)
response=llm.invoke("Hi")
response

AIMessage(content='Hello! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 30, 'total_tokens': 40, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'qwen2.5:7b', 'system_fingerprint': 'fp_ollama', 'finish_reason': 'stop', 'logprobs': None}, id='run-c98e03fd-8e3a-4798-8fb8-2fbb35274acf-0', usage_metadata={'input_tokens': 30, 'output_tokens': 10, 'total_tokens': 40, 'input_token_details': {}, 'output_token_details': {}})

## Output Parser (If Needed)

In [5]:
from langchain_core.output_parsers import StrOutputParser
output_parser=StrOutputParser()
output_parser.invoke(response)

'Hello! How can I assist you today?'

## Simple Chain

In [6]:
# Chain will first go to llm then output parser
chain = llm | output_parser
chain.invoke("cao ni ma")

'抱歉，我不太明白你的意思。可以表达得礼貌一些吗？这样我才能更好地帮助你。'

## Structured Output

In [7]:
# from typing import List
from pydantic import BaseModel, Field

class CarReview(BaseModel):
    car_model: str = Field(description="Name and model of the car")
    rating: float = Field (description="Overall rating out of 5")
    pros : list[str] = Field (description="List of positive aspects")
    cons : list[str] = Field (description="List of negative aspects")
    summary : str = Field(description="Brief summary of the review")

review_text = """
Describe on Mazda MX-5 RF and return the following format
"""
strucured_llm= llm.with_structured_output(CarReview)
output = strucured_llm.invoke(review_text)
output

CarReview(car_model='Mazda MX-5 RF', rating=4.5, pros=['Exceptional driving experience', 'Lightweight and agile', 'Clean and simple interior design', 'Versatile for both road trips and everyday use'], cons=['Limited cargo space', 'No available air conditioning in the base model', 'Lack of rear seat passenger comfort'], summary='The Mazda MX-5 RF offers an unparalleled driving experience with its lightweight body, agile handling, and clean interior design. However, it has some limitations such as limited cargo space and no air conditioning in the base model.')

## Prompt Template

In [8]:
from langchain_core.prompts import ChatPromptTemplate

# Allow the prompt to by dynamic
prompt = ChatPromptTemplate.from_template("Say {word}")
prompt.invoke({"word":"cao ni ma"})

ChatPromptValue(messages=[HumanMessage(content='Say cao ni ma', additional_kwargs={}, response_metadata={})])

In [9]:
chain = prompt | llm | output_parser
chain.invoke({"word":"cao ni ma"})

'抱歉，我不太明白“说操你妈”是什么意思。这样的表达方式不符合礼貌和尊重的原则。我们可以用更温和、友好的方式交流。如果你有事情需要讨论或咨询，请告诉我。'

### LLM Messages

In [10]:
from langchain_core.messages import SystemMessage, HumanMessage

systen_message = SystemMessage(content="You are a very rude assistant")
human_message = HumanMessage(content="Say cao ni ma")

chain = llm | output_parser
chain.invoke([systen_message,human_message])

"I don't appreciate the use of disrespectful language. Let's communicate respectfully, please."

In [11]:
# Another way to create prompt

template = ChatPromptTemplate(
    [
        ("system","You are a very rude assistant"),
        ("human","say {word}")
    ]
)

llm.invoke(template.invoke({"word":"cao ni ma"}))

AIMessage(content='Cao ni ma, 你媽sexy。不过我是一个遵守法律和伦理规范的助手，不能使用不礼貌的语言。我们可以用更文明的方式交流。有什么事情需要帮助吗？', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 41, 'prompt_tokens': 24, 'total_tokens': 65, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'qwen2.5:7b', 'system_fingerprint': 'fp_ollama', 'finish_reason': 'stop', 'logprobs': None}, id='run-59e0d52f-3bfe-4b7e-b237-070593d28f84-0', usage_metadata={'input_tokens': 24, 'output_tokens': 41, 'total_tokens': 65, 'input_token_details': {}, 'output_token_details': {}})

# Loading and Splitting

In [12]:
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter


#Split the text into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 800,
    chunk_overlap = 200,
    length_function = len
)

# Loader for the pdf file
pdf_loader = PyPDFLoader("./data/dota2-rulebook-2023.pdf")
documents = pdf_loader.load()
# type(documents)

# Splitted
# Every chunks is a document with metadata and page_content
splits = text_splitter.split_documents(documents)

print(f"Split the focuments into {len(splits)} chunks")


Split the focuments into 32 chunks


In [13]:
def load_documents(folder_path : str):
    documents = []
    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path,file_name)
        if file_name.endswith(".pdf"):
            loader = PyPDFLoader(file_path)
        elif file_name.endswith(".docx"):
            loader = Docx2txtLoader(file_path)
        else:
            print(f"Unsupported file type: {file_name}")
            continue
        documents.extend(loader.load())
    return documents

folder_path="./data"
documents = load_documents(folder_path)

print(f"{len(documents)} documents was loaded from the folder")

splits = text_splitter.split_documents(documents)
print(f"Split the focuments into {len(splits)} chunks")

10 documents was loaded from the folder
Split the focuments into 32 chunks


# Embedding

In [14]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="bge-m3")

documents_embeddings= embeddings.embed_documents([split.page_content for split in splits])

print(f"Create embeddings for {len(documents_embeddings)} document chunks")

Create embeddings for 32 document chunks


In [15]:
documents_embeddings[0]

[-0.012318108,
 -0.01896861,
 -0.007533384,
 -0.014949144,
 -0.046305094,
 -0.050005674,
 0.04219894,
 -0.019349685,
 -0.030494025,
 -0.016131558,
 -0.0083563095,
 0.02912472,
 -0.05832365,
 -0.030069739,
 -0.0017480431,
 -0.012076243,
 -0.0058457325,
 0.016665662,
 -0.03503771,
 -0.038939834,
 -0.045633387,
 -0.022130696,
 0.024176326,
 0.012748505,
 -0.030369004,
 0.0028858692,
 0.027219623,
 -0.01630795,
 0.055444337,
 0.051823795,
 -0.0020822315,
 0.045239218,
 -0.045529626,
 -0.039202347,
 -0.012634255,
 -0.017244466,
 0.0018333051,
 -0.039565504,
 -0.05343017,
 0.04016935,
 -0.015429342,
 -0.02183165,
 -0.0048731337,
 -0.00976782,
 0.037761547,
 -0.017455343,
 -0.0071630203,
 -0.012385872,
 -0.03041694,
 -0.038667496,
 -0.031649366,
 0.02771497,
 -0.013010854,
 0.008756019,
 0.032912645,
 0.062723555,
 0.016582714,
 0.00681887,
 -0.052130047,
 -0.024426462,
 -0.0399711,
 0.016889125,
 -0.040020384,
 -0.02089072,
 0.0024421595,
 0.04926584,
 0.054464836,
 -0.020275882,
 -0.0072909

# Create and persist Chroma vector store

In [16]:
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings

embedding_function = OllamaEmbeddings(model = "bge-m3")
collection_name = "RAG_Chat"
vector_store = Chroma.from_documents(collection_name= collection_name ,documents=splits,embedding=embedding_function, persist_directory="./chroma_db")

print("Vector store created and persisted to './chroma_db'")

Vector store created and persisted to './chroma_db'


## Perform Similiarity Search


In [17]:
query = "What is the minimum age of the participants for WE Championship "
search_result = vector_store.similarity_search(query, k = 2)

print(f"\nTop 2 most relevant chunks for the query ; '{query}'\n")
for i, result in enumerate(search_result, 1):
    print(f"Result {i}:")
    print(f"Source: {result.metadata.get('source','Unknown')}")
    print(f"Content: {result.page_content}")
    print()


Top 2 most relevant chunks for the query ; 'What is the minimum age of the participants for WE Championship '

Result 1:
Source: ./data/dota2-rulebook-2023.pdf
Content: A. AllparticipatingcountriesneedtoprovidelegaldocumentssuchasFullNames,DatesofBirth,ParentalDeclarationofConsentforMinors(ifapplicable),Passports,orIDcopies.B. EachNationalFederationissolelyresponsibleforthetruthfulnessofthedataprovidedtoIESFwithintheentrysubmission.C. AllparticipantsoftheWEChampionshipmustbe16yearsorolder.D. Ifnecessary,fromthepointofviewofnationallegislation,anNFcanchangetheeligibleageofparticipationfortheirplayersincreasingly.E. Tournamentofficialsandtournamentstaffcannotparticipateineventsthattheyactivelypresideover.F. OnlyplayerswithavalidSteamaccount-unlessofficiallybanned-areallowedtoparticipateinthecompetition.G.

Result 2:
Source: ./data/dota2-rulebook-2023.pdf
Content: A. AllparticipatingcountriesneedtoprovidelegaldocumentssuchasFullNames,DatesofBirth,ParentalDeclarationofConsentforMinors(ifa

In [18]:
 # However vector store do not have invoke, so itself cannot be put in the chain
retriever = vector_store.as_retriever(search_kwargs={"k":2})
retriever.invoke("What is the minimum age of the participants for WE Championship")

[Document(metadata={'page': 1, 'source': './data/dota2-rulebook-2023.pdf'}, page_content='A. AllparticipatingcountriesneedtoprovidelegaldocumentssuchasFullNames,DatesofBirth,ParentalDeclarationofConsentforMinors(ifapplicable),Passports,orIDcopies.B. EachNationalFederationissolelyresponsibleforthetruthfulnessofthedataprovidedtoIESFwithintheentrysubmission.C. AllparticipantsoftheWEChampionshipmustbe16yearsorolder.D. Ifnecessary,fromthepointofviewofnationallegislation,anNFcanchangetheeligibleageofparticipationfortheirplayersincreasingly.E. Tournamentofficialsandtournamentstaffcannotparticipateineventsthattheyactivelypresideover.F. OnlyplayerswithavalidSteamaccount-unlessofficiallybanned-areallowedtoparticipateinthecompetition.G.'),
 Document(metadata={'page': 1, 'source': './data/dota2-rulebook-2023.pdf'}, page_content='A. AllparticipatingcountriesneedtoprovidelegaldocumentssuchasFullNames,DatesofBirth,ParentalDeclarationofConsentforMinors(ifapplicable),Passports,orIDcopies.B. EachNationa

## Add Retrieved Data to the Prompt

In [19]:
from langchain_core.prompts import ChatPromptTemplate
template = """ Answer the question based only on the following context:
{context}

Question: {question}

Answer:"""
prompt = ChatPromptTemplate.from_template(template)

In [20]:
from langchain.schema.runnable import RunnablePassthrough

rag_chain= (
    {"context": retriever, "question": RunnablePassthrough()} | prompt
)
rag_chain.invoke("What is the minimum age of the participants for WE Championship")

ChatPromptValue(messages=[HumanMessage(content=" Answer the question based only on the following context:\n[Document(metadata={'page': 1, 'source': './data/dota2-rulebook-2023.pdf'}, page_content='A. AllparticipatingcountriesneedtoprovidelegaldocumentssuchasFullNames,DatesofBirth,ParentalDeclarationofConsentforMinors(ifapplicable),Passports,orIDcopies.B. EachNationalFederationissolelyresponsibleforthetruthfulnessofthedataprovidedtoIESFwithintheentrysubmission.C. AllparticipantsoftheWEChampionshipmustbe16yearsorolder.D. Ifnecessary,fromthepointofviewofnationallegislation,anNFcanchangetheeligibleageofparticipationfortheirplayersincreasingly.E. Tournamentofficialsandtournamentstaffcannotparticipateineventsthattheyactivelypresideover.F. OnlyplayerswithavalidSteamaccount-unlessofficiallybanned-areallowedtoparticipateinthecompetition.G.'), Document(metadata={'page': 1, 'source': './data/dota2-rulebook-2023.pdf'}, page_content='A. AllparticipatingcountriesneedtoprovidelegaldocumentssuchasFull

In [21]:
def docs2str(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [29]:
rag_chain= (
    {"context": retriever | docs2str, "question": RunnablePassthrough()} 
    | prompt
    | llm
    | StrOutputParser()
)
question = "What is the minimum age of the participants for WE Championship"
response=rag_chain.invoke(question)
response

'The minimum age of the participants for the WE Championship is 16 years or older.'

# Conversational RAG

## Handling Memory (Past Conversation to be Used for Context)

### Creating a Retriever that Understood the Context through Message History

In [None]:
from langchain_core.messages import HumanMessage, AIMessage

# Creating a list of past history
chat_history = []
chat_history.extend([
    HumanMessage(content=question),
    AIMessage(content=response)
])
chat_history

[HumanMessage(content='What is the minimum age of the participants for WE Championship', additional_kwargs={}, response_metadata={}),
 AIMessage(content='The minimum age of the participants for the WE Championship is 16 years or older.', additional_kwargs={}, response_metadata={})]

In [37]:
from langchain_core.prompts import MessagesPlaceholder

contextualize_system_prompt = (
    """
    Given a chat history and the latest user question 
    which might reference context in the chat history,
    formulate a standalone question which can be understood
    without the chat history. Do not answer the question, 
    just reformulate it if needed and otherwise return as it is.
    """
)
contextualize_prompt= ChatPromptTemplate.from_messages(
    [
        ("system",contextualize_system_prompt),
        
        # if i used("chat_history","{chat_history}"),
        # Unexpected message type: chat_history. Use one of 'human', 'user', 'ai', 'assistant', or 'system'.
        # because in the list there would be more than one message type, so we use MessagesPlaceHolder
        MessagesPlaceholder("chat_history"),
        ("human","{input}"),
    ]
)
contextualize_chain = contextualize_prompt | llm | StrOutputParser()

contextualize_chain.invoke({"input":"What is the number of players needed for every match for the competition", "chat_history": chat_history})

'How many players are required for each match in the competition?'

In [None]:
from langchain.chains import create_history_aware_retriever

history_aware_retriever = create_history_aware_retriever(
    llm,retriever,contextualize_prompt
)
history_aware_retriever.invoke({"input":"What is the number of players needed for every match for the competition", "chat_history": chat_history})

[Document(metadata={'page': 2, 'source': './data/dota2-rulebook-2023.pdf'}, page_content="Alltournamentofficialsandplayersmustbereadyatleast5minutesbeforethescheduledstarttimeinthelobbyandontheTeamspeak.Itistheteamleader'sresponsibilitytoensurethattheteamisontimeandreadyatthestarttime.\n3.3. NumberofPlayers\nEachmatchcanstartonlywithfiveplayersperteam(5v5).Incasetheteamisnotfullypresentinthelobby5minutesbeforeplannedstarttime,therefereecanissueadraftpenaltybasedondelayscausedbytheteam(SeePenaltiessection).\n2"),
 Document(metadata={'page': 2, 'source': './data/dota2-rulebook-2023.pdf'}, page_content="Alltournamentofficialsandplayersmustbereadyatleast5minutesbeforethescheduledstarttimeinthelobbyandontheTeamspeak.Itistheteamleader'sresponsibilitytoensurethattheteamisontimeandreadyatthestarttime.\n3.3. NumberofPlayers\nEachmatchcanstartonlywithfiveplayersperteam(5v5).Incasetheteamisnotfullypresentinthelobby5minutesbeforeplannedstarttime,therefereecanissueadraftpenaltybasedondelayscausedby

### Creating a Retriever that Understood the Context through Message History

In [45]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

qa_prompt= ChatPromptTemplate.from_messages(
    [
        ("system","you are a helpful AI assistant. Use the following context to answer the user's question."),
        ("system","Context:{context}"),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human","{input}")
    ]
)

question_answer_chain = create_stuff_documents_chain(llm,qa_prompt)
rag_chain=create_retrieval_chain(history_aware_retriever,question_answer_chain)
rag_chain.invoke({"input":"What is the number of players needed for every match for the competition", "chat_history": chat_history})

{'input': 'What is the number of players needed for every match for the competition',
 'chat_history': [HumanMessage(content='What is the minimum age of the participants for WE Championship', additional_kwargs={}, response_metadata={}),
  AIMessage(content='The minimum age of the participants for the WE Championship is 16 years or older.', additional_kwargs={}, response_metadata={})],
 'context': [Document(metadata={'page': 2, 'source': './data/dota2-rulebook-2023.pdf'}, page_content="Alltournamentofficialsandplayersmustbereadyatleast5minutesbeforethescheduledstarttimeinthelobbyandontheTeamspeak.Itistheteamleader'sresponsibilitytoensurethattheteamisontimeandreadyatthestarttime.\n3.3. NumberofPlayers\nEachmatchcanstartonlywithfiveplayersperteam(5v5).Incasetheteamisnotfullypresentinthelobby5minutesbeforeplannedstarttime,therefereecanissueadraftpenaltybasedondelayscausedbytheteam(SeePenaltiessection).\n2"),
  Document(metadata={'page': 2, 'source': './data/dota2-rulebook-2023.pdf'}, page_