# 1. Loading the Data from JSon File using JSONLOader

In [25]:
from dotenv import load_dotenv
load_dotenv()

True

In [26]:
from  langchain_community.document_loaders import JSONLoader

In [27]:
loader = JSONLoader(
    file_path="D:/ML(ExtraClass Project)/RAG_PROJECT/HR_Policy_ChatBot-RAG-/data/hr_policy.json",
    jq_schema=".[] | {text: (.title + \": \" + .description)}",
    text_content=False
)

docs = loader.load()
print("Documents loaded successfully.")
print(docs)  # This will print the loaded documents to verify the output
    

Documents loaded successfully.
[Document(metadata={'source': 'D:\\ML(ExtraClass Project)\\RAG_PROJECT\\HR_Policy_ChatBot-RAG-\\data\\hr_policy.json', 'seq_num': 1}, page_content='{"text": "Leave Policy: Employees are entitled to 20 paid leaves per year, excluding public holidays."}'), Document(metadata={'source': 'D:\\ML(ExtraClass Project)\\RAG_PROJECT\\HR_Policy_ChatBot-RAG-\\data\\hr_policy.json', 'seq_num': 2}, page_content='{"text": "Remote Work Policy: Employees may work remotely up to 3 days a week upon manager approval."}'), Document(metadata={'source': 'D:\\ML(ExtraClass Project)\\RAG_PROJECT\\HR_Policy_ChatBot-RAG-\\data\\hr_policy.json', 'seq_num': 3}, page_content='{"text": "Code of Conduct: All employees are expected to maintain professionalism and ethical behavior in the workplace."}'), Document(metadata={'source': 'D:\\ML(ExtraClass Project)\\RAG_PROJECT\\HR_Policy_ChatBot-RAG-\\data\\hr_policy.json', 'seq_num': 4}, page_content='{"text": "Work Hours: Official work hours

In [28]:
# Lets see the length of the documents
print(f"Number of documents loaded: {len(docs)}")

Number of documents loaded: 20


In [29]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # Adjust chunk size as needed
    chunk_overlap=200,  # Adjust overlap as needed\
)

docs = text_splitter.split_documents(docs)
print(f"Number of chunks after splitting: {len(docs)}")

Number of chunks after splitting: 20


In [30]:
for i ,doc in enumerate(docs):
    print(f"Chunk {i+1}: {doc.page_content[:200]}...")  # Print first 200 characters of each chunk
    print(f"Metadata: {doc.metadata}\n")  # Print metadata for each chunk

Chunk 1: {"text": "Leave Policy: Employees are entitled to 20 paid leaves per year, excluding public holidays."}...
Metadata: {'source': 'D:\\ML(ExtraClass Project)\\RAG_PROJECT\\HR_Policy_ChatBot-RAG-\\data\\hr_policy.json', 'seq_num': 1}

Chunk 2: {"text": "Remote Work Policy: Employees may work remotely up to 3 days a week upon manager approval."}...
Metadata: {'source': 'D:\\ML(ExtraClass Project)\\RAG_PROJECT\\HR_Policy_ChatBot-RAG-\\data\\hr_policy.json', 'seq_num': 2}

Chunk 3: {"text": "Code of Conduct: All employees are expected to maintain professionalism and ethical behavior in the workplace."}...
Metadata: {'source': 'D:\\ML(ExtraClass Project)\\RAG_PROJECT\\HR_Policy_ChatBot-RAG-\\data\\hr_policy.json', 'seq_num': 3}

Chunk 4: {"text": "Work Hours: Official work hours are from 9 AM to 6 PM, Monday to Friday. A 1-hour break is given for lunch."}...
Metadata: {'source': 'D:\\ML(ExtraClass Project)\\RAG_PROJECT\\HR_Policy_ChatBot-RAG-\\data\\hr_policy.json', 'seq_num': 4}

Ch

# 2 Creating Embedding Using HuggingFaceEmbeddings

In [31]:
from langchain_huggingface import HuggingFaceEmbeddings


In [32]:
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [33]:
# lets create a LLM using ChatGroq

from langchain_groq import ChatGroq

In [34]:
llm = ChatGroq(model="llama-3.1-8b-instant",temperature=0.1)

In [35]:
# creating a vector store using Chromadb
from langchain.vectorstores import Chroma

In [36]:
vectorstore = Chroma.from_documents(
    embedding=embedding,
    documents=docs,
    persist_directory="../vector_store",
    collection_name="hr_policy_collection")

print("Vector store created successfully.")

Vector store created successfully.


In [37]:
# Retrieving documents from the vector store

reteriver = vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={
        "k": 5,
        'lambda_mult': 0.5,  # Adjust this parameter to control diversity
    }
)

In [38]:
query = "What is the company's policy on remote work?"
result = reteriver.invoke(query)

In [39]:
result

[Document(metadata={'source': 'D:\\ML(ExtraClass Project)\\RAG_PROJECT\\HR_Policy_ChatBot-RAG-\\data\\hr_policy.json', 'seq_num': 2}, page_content='{"text": "Remote Work Policy: Employees may work remotely up to 3 days a week upon manager approval."}'),
 Document(metadata={'source': 'D:\\ML(ExtraClass Project)\\RAG_PROJECT\\HR_Policy_ChatBot-RAG-\\data\\hr_policy.json', 'seq_num': 2}, page_content='{"text": "Remote Work Policy: Employees may work remotely up to 3 days a week upon manager approval."}'),
 Document(metadata={'seq_num': 2, 'source': 'D:\\ML(ExtraClass Project)\\RAG_PROJECT\\HR_Policy_ChatBot-RAG-\\data\\hr_policy.json'}, page_content='{"text": "Remote Work Policy: Employees may work remotely up to 3 days a week upon manager approval."}'),
 Document(metadata={'source': 'D:\\ML(ExtraClass Project)\\RAG_PROJECT\\HR_Policy_ChatBot-RAG-\\data\\hr_policy.json', 'seq_num': 2}, page_content='{"text": "Remote Work Policy: Employees may work remotely up to 3 days a week upon manager

In [40]:
from langchain_core.prompts import PromptTemplate


In [41]:
prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are an HR policy assistant. If the user's question is a casual greeting (such as "hi", "hello", "hey", "good morning", "good afternoon", "good evening"), respond with a friendly greeting and ask how you can help with HR policies.
Otherwise, use the context below to answer the user's question. If the answer is not found in the context, reply with: "Sorry, I couldn't find that in the HR policy. Please contact HR for further assistance."

Context:
{context}

Question: {question}

Answer:
"""
)

In [42]:
from langchain_core.output_parsers import StrOutputParser

In [43]:
parser = StrOutputParser()

In [44]:
# Now lets make a chain to answer the question using the LLM and the retriever using RunnableSequence
from langchain.schema.runnable import RunnableSequence


In [45]:
# : Build the chain using RunnableSequence


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

chain = RunnableSequence(
    {
        "context": reteriver | format_docs,
        "question": lambda x: x
    },
    prompt_template,
    llm,
    parser
)



In [46]:
query = "What is the company's policy on remote work?"
response = chain.invoke(query)

In [47]:
print("Response from the chain:")
print(response)

Response from the chain:
Good morning! How can I help with HR policies today?


In [48]:
print("Testing whether it greets casual greetings correctly")
response = chain.invoke("Hello there!")
print(response)

Testing whether it greets casual greetings correctly
Hello there! It's nice to meet you. How can I assist you with our HR policies today?
