## Le Mans Chatbot

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

from langchain_groq import ChatGroq
api=os.getenv("GROQ_API_kEY")
llm=ChatGroq(model_name="Llama3-8b-8192")

In [2]:
# Importing libraries
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain # Combines all documents then sends to prompt template

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
embeddings=OllamaEmbeddings(model="mxbai-embed-large")

In [4]:
# Loading data from the url="https://www.britannica.com/sports/24-Hours-of-Le-Mans"
"""
import bs4
loader=WebBaseLoader(
    web_paths=("https://www.britannica.com/sports/24-Hours-of-Le-Mans","https://en.wikipedia.org/wiki/List_of_24_Hours_of_Le_Mans_winners"),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("topic-paragraph","topic-paragraph")
        )
    )
)

docs=loader.load()
docs
"""
loader=WebBaseLoader(["https://www.britannica.com/sports/24-Hours-of-Le-Mans","https://en.wikipedia.org/wiki/List_of_24_Hours_of_Le_Mans_winners"])
docs=loader.load()
docs

[Document(metadata={'source': 'https://www.britannica.com/sports/24-Hours-of-Le-Mans', 'title': '24 Hours of Le Mans | Endurance Race, Automotive History, French Circuit | Britannica', 'description': '24 Hours of Le Mans, probably the world’s best-known automobile race, run annually (with few exceptions) since 1923 at the Sarthe road-racing circuit, near Le Mans, France. Since 1928 the winner has been the car that travels the greatest distance in a 24-hour time period. The racing circuit is', 'language': 'en'}, page_content="\n\n\n\n\n\n\n\n\n\n\n\n\n24 Hours of Le Mans | Endurance Race, Automotive History, French Circuit | Britannica\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSearch Britannica\n\n\n\n\n\nClick here to search\n\n\n\n\n\n\n\n\n\n\n\nSearch Britannica\n\n\n\n\n\nClick here to search\n\n\n\n\n\n\n\n   Subscribe\n\n\n\n   Subscribe\n\n\nLogin\n\nhttps://premium.britannica.com/premium-membership/?utm_source=premium&utm_medium=na

In [5]:
# Splitting documents and storing the embedded vectors in Vectore storeDB
textsplitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
splits=textsplitter.split_documents(docs)
vdb=Chroma.from_documents(splits,embeddings)

In [6]:
# Using retriever
retriver=vdb.as_retriever()
retriver

VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x00000276BB87B6B0>, search_kwargs={})

In [7]:
# Prompt template
system_prompt=(
    "You are 'Le mans Assistant' an Assistant for answering le mans related questions."
    "Use the following pieces of retrieved context to answer the question."
    "If you dont know the answer, say you 'Sorry! I do not have the answer to that question.'."
    "Use max of 3 sentence and keep the answer concise."
    "\n \n"
    "{context}"
)
prompt=ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt),
        ("human","{input}")
    ]
)

question_answer_chain=create_stuff_documents_chain(llm,prompt)
rag_chain=create_retrieval_chain(retriver,question_answer_chain)
response=rag_chain.invoke({"input":"List some le mans winners"})
response

{'input': 'List some le mans winners',
 'context': [Document(metadata={'language': 'en', 'source': 'https://en.wikipedia.org/wiki/List_of_24_Hours_of_Le_Mans_winners', 'title': 'List of 24 Hours of Le Mans winners - Wikipedia'}, page_content='List of 24 Hours of Le Mans winners - Wikipedia\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nJump to content\n\n\n\n\n\n\n\nMain menu\n\n\n\n\n\nMain menu\nmove to sidebar\nhide\n\n\n\n\t\tNavigation\n\t\n\n\nMain pageContentsCurrent eventsRandom articleAbout WikipediaContact us\n\n\n\n\n\n\t\tContribute\n\t\n\n\nHelpLearn to editCommunity portalRecent changesUpload file\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nAppearance\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDonate\n\nCreate account\n\nLog in\n\n\n\n\n\n\n\n\nPersonal tools\n\n\n\n\n\nDonate Create account Log in\n\n\n\n\n\n\t\tPages for logged out editors learn more\n\n\n\nContributionsTalk\n\n

### Now let's add chat history

In [8]:
response2=rag_chain.invoke({"input":"What is Le mans?"})
response2

{'input': 'What is Le mans?',
 'context': [Document(metadata={'description': '24 Hours of Le Mans, probably the world’s best-known automobile race, run annually (with few exceptions) since 1923 at the Sarthe road-racing circuit, near Le Mans, France. Since 1928 the winner has been the car that travels the greatest distance in a 24-hour time period. The racing circuit is', 'language': 'en', 'source': 'https://www.britannica.com/sports/24-Hours-of-Le-Mans', 'title': '24 Hours of Le Mans | Endurance Race, Automotive History, French Circuit | Britannica'}, page_content="News •\n\n\nCanada's AWA Racing prepares for 24 Hours of Le Mans after automatic invitation\n\n• Nov. 6, 2024, 9:15 AM ET (Globe and Mail)\n\n\t\t\n\t\n\n\n\n\n\t\t\t\tShow less \n\n\n24 Hours of Le Mans,  probably the world’s best-known automobile race, run annually (with few exceptions) since 1923 at the Sarthe road-racing circuit, near Le Mans, France. Since 1928 the winner has been the car that travels the greatest dist

In [9]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder

contextualize_q_system_prompt=(
    "Given a chat history and the latest user question" 
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_prompt_tem=ChatPromptTemplate.from_messages(
    [
        ("system",contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human","{input}"),
    ]
)

qa_prompt=ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human","{input}")
    ]
)

history_ret=create_history_aware_retriever(llm,retriver,contextualize_prompt_tem)
qa_chain=create_stuff_documents_chain(llm,qa_prompt)
rag_chain=create_retrieval_chain(history_ret,qa_chain)

from langchain_core.messages import AIMessage,HumanMessage
chat_history=[]
question="What is Le Mans ?"
response3=rag_chain.invoke({"input":"question","chat_history":chat_history})
chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(response3["answer"])
    ]
)

question2="Tell me more about it"
response4=rag_chain.invoke({"input":"question","chat_history":chat_history})
print(response4['answer'])

Le Mans is an annual 24-hour automobile endurance race organised by the automotive group Automobile Club de l'Ouest (ACO) and held on the Circuit de la Sarthe race track close to the city of Le Mans, the capital of the French department of Sarthe.
