In [6]:
!pip install langchain_community tiktoken langchainhub chromadb langchain
!pip install mistralai
!pip install -U langchain-core langchain-mistralai



In [7]:
!pip install -U langchain-huggingface
!pip install sentence-transformers
!pip install faiss-cpu



In [8]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
# Import for HuggingFace pipeline
from langchain.chains import LLMChain
from langchain.llms import HuggingFacePipeline

from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core._api.deprecation import warn_deprecated

In [9]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
from google.colab import userdata
os.environ['LANGCHAIN_API_KEY'] = userdata.get('LANGSMITH_KEY')
os.environ['MISTRAL_API'] = userdata.get('MISTRAL_API')
os.environ['HF_TOKEN'] = userdata.get('HF_TOKEN')

1) Loading the reference document(here a webpage):

In [10]:
from langchain_community.document_loaders import WebBaseLoader
import bs4
loader = WebBaseLoader(
    web_paths = ("https://www.travelers.com/car-insurance/faqs",),
    bs_kwargs = dict(
        parse_only = bs4.SoupStrainer(
            class_= {"TextRun SCXW127701019 BCX0","accordion__heading ","tds-accordion__header js-accordion-header","tds-accordion__content"}
        )
    )
)
docs = loader.load()

2) Splitting the document in several chunks to embed it:

In [11]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=100,
    chunk_overlap=10,
)
splits = text_splitter.split_documents(docs)

In [12]:
len(splits)

62

3) After spliting, the chunks will now be embedded so the LLM can access it

In [13]:
embd = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
db = FAISS.from_documents(splits, embd)

# retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4})
retriever = db.as_retriever()

  from tqdm.autonotebook import tqdm, trange


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/777 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [14]:
from langchain_mistralai.chat_models import ChatMistralAI
from langchain.prompts import ChatPromptTemplate

# Prompt
template = """Answer the question based on your knowledge. Use the following context to help:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
chat = ChatMistralAI(api_key=userdata.get('MISTRAL_API'))

In [15]:
from langchain.prompts import ChatPromptTemplate

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five
different versions of the given user question to retrieve relevant documents from a vector
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search.
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser

generate_queries = (
    prompt_perspectives
    | chat
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)
# doc1 = generate_queries.invoke("What is car insurance?")
# doc1

In [16]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

# Retrieve
retrieval_chain = generate_queries | retriever.map() | get_unique_union

In [17]:
from langchain import hub
prompt_hub_rag = hub.pull("rlm/rag-prompt")

In [18]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter
rag_chain = (
    {"context": retrieval_chain,
     "question": RunnablePassthrough()}
    | prompt_hub_rag
    | chat
    | StrOutputParser()
)

In [None]:
# while True:
#     question = input("User: ")
#     if question == "exit" or question == "Exit":
#         break
#     print(rag_chain.invoke(question))

Add a delay between each request to avoid hitting the rate limit. Can use the time module for this.


In [19]:
def run_the_bot():
  import time

  while True:
      question = input("User: ")
      if question == "exit" or question == "Exit":
          print("Have a good day!")
          break
      print(rag_chain.invoke(question))
      time.sleep(1)  # Wait for 1 second before the next request

In [20]:
run_the_bot()

User: exit
Have a good day!


A more robust solution is to use exponential backoff. This strategy increases the waiting time between retries exponentially, which is more respectful of the API limits.

In [None]:
import time
from tenacity import retry, stop_after_attempt, wait_exponential

@retry(wait=wait_exponential(multiplier=1, min=1, max=60), stop=stop_after_attempt(6))
def run_chain(question):
    return rag_chain.invoke(question)

while True:
    question = input("User: ")
    if question == "exit" or question == "Exit":
        print("Have a good day!")
        break
    print(run_chain(question))