In [None]:
!pip install -qU langchain langchain-core langchain-community langchain-experimental langchain-openai langchain-groq pypdf langchain-text-splitters langchain-chroma rank_bm25 flashrank

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m399.9/399.9 kB[0m [31m28.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m63.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0

In [None]:
import os
from google.colab import userdata
os.environ['GROQ_API_KEY'] = userdata.get('GROQ_API_KEY')
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
os.environ['HUGGINGFACEHUB_API_TOKEN'] = userdata.get('HUGGINGFACEHUB_API_TOKEN')
os.environ['LANGSMITH_API_KEY'] = userdata.get('LANGSMITH_API_KEY')

# Setting Up Langsmith

In [None]:
os.environ['LANGCHAIN_API_KEY'] = userdata.get('LANGSMITH_API_KEY')
os.environ['LANGSMITH_TRACING'] = "true"
os.environ["LANGCHAIN_PROJECT"] = "Normal_Rag"

# Set Up LLM

In [None]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model = "llama-3.2-1b-preview",
    temperature=0.1
)

# Load PDF

In [None]:
from langchain_community.document_loaders import PyPDFLoader

In [None]:
file_path = '/content/drive/MyDrive/Colab_Notebooks/GenerativeAI/RAG/Hybrid_RAG/Knowledge/HR.pdf'
loader = PyPDFLoader(
    file_path=file_path
)

In [None]:
docs = loader.load()

# Chunk Docs

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 300,
    length_function = len,
    is_separator_regex=False

)

In [None]:
texts = text_splitter.split_documents(docs)

# Embeddings

In [None]:
from langchain_openai import OpenAIEmbeddings

In [None]:
embeddings = OpenAIEmbeddings()

In [None]:
embeddings_size = embeddings.embed_query("Hello World")

len(embeddings_size)

1536

# Vector Store

In [None]:
from langchain_chroma import Chroma

In [None]:
persist_directory='hr_db'

if os.path.exists(persist_directory):
    # Load from disk
    db = Chroma(
        persist_directory=persist_directory,
        embedding_function=embeddings
    )
else:
  # Save to disk.
  db = Chroma.from_documents(
      documents=texts,
      embedding=embeddings,
      persist_directory=persist_directory
  )

In [None]:
vectorstores_retriever = db.as_retriever(search_kwargs={"k": 3})

# Keyword Store

In [None]:
from langchain_community.retrievers import BM25Retriever

In [None]:
keyword_retriever = BM25Retriever.from_documents(
    documents=docs,
)

In [None]:
keyword_retriever.k = 3

# Ensemble Retriever

In [None]:
from langchain.retrievers import EnsembleRetriever

In [None]:
ensemble_retriever = EnsembleRetriever(
    retrievers=[keyword_retriever, vectorstores_retriever], weights=[0.3, 0.7]
)

# Chain With Memory

In [None]:
from langchain.chains import create_retrieval_chain, create_history_aware_retriever
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

## History Aware Retriever

In [None]:
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question which might reference context in the chat history, "
    "formulate a standalone question which can be understood without the chat history."
    "Do NOT answer the question, just reformulate it if needed and otherwise return it as is."
)

In [None]:
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [None]:
history_aware_retriever = create_history_aware_retriever(
    llm,
    ensemble_retriever,
    contextualize_q_prompt
)

## Answer Question

In [None]:
system_prompt = (
    "You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question."
    "If you don't know the answer, say that you don't know."
    "Use three sentences maximum and keep the answer concise."
    "\n\n"
    "{context}"
)

In [None]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

# RAG Chain

In [None]:
qa_chain = create_stuff_documents_chain(llm, qa_prompt)

In [None]:
rag_chain = create_retrieval_chain(
    history_aware_retriever,
    qa_chain
)

# Chat History

In [None]:
from langchain_core.messages import HumanMessage, AIMessage

In [None]:
chat_history = []

In [None]:
user_input = "How many earned leaves do i get per quarter?"

response = rag_chain.invoke({
    "input" : user_input,
    "chat_history" : chat_history
})


chat_history.extend([
    HumanMessage(content=user_input),
    AIMessage(content=response['answer'])
])

print(response['answer'])

According to the provided context, employees receive 16 earned leaves per year, or four per quarter.


In [None]:
user_input = "Can I wear jersey on monday?"

response = rag_chain.invoke({
    "input" : user_input,
    "chat_history" : chat_history
})


chat_history.extend([
    HumanMessage(content=user_input),
    AIMessage(content=response['answer'])
])

print(response['answer'])

According to the provided context, employees are expected to wear formal full-sleeved shirts tucked into formal trousers, skirts, and appropriate footwear on Monday through Thursday, and business casuals on Fridays.


In [None]:
chat_history

[HumanMessage(content='How many earned leaves do i get per quarter?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='According to the provided context, employees receive 16 earned leaves per year, or four per quarter.', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Can I wear jersey on monday?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='According to the provided context, employees are expected to wear formal full-sleeved shirts tucked into formal trousers, skirts, and appropriate footwear on Monday through Thursday, and business casuals on Fridays.', additional_kwargs={}, response_metadata={})]