# Check Your Environment

## Load openai key


In [48]:
import os 
from dotenv import load_dotenv,find_dotenv
_=load_dotenv(find_dotenv())
OPEN_API_KEY=os.getenv('OPENAI_API_KEY')


# Define LLM

In [49]:
from langchain_openai import ChatOpenAI
model="gpt-4o-mini"
llm = ChatOpenAI(
    model="gpt-4o-mini",
    api_key=OPEN_API_KEY
)

# Document Ingestion

In [50]:
#!pip install pypdf

In [51]:
from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader("SAMPLE HR MANUAL.pdf")
document=loader.load()

In [52]:
len(document)

11

In [53]:
document[9].page_content



# Text Splitter

In [54]:
# !pip install  langchain-text-splitters

In [55]:
text="""Hello world. My name is Anuradha. I teach data science.
LangChain is a powerful framework that helps build LLM applications.
"""

In [56]:
from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter

# 1. CharacterTextSplitter
char_splitter = CharacterTextSplitter(
    separator=".",         # Naively split on spaces
    chunk_size=25,        # Force smaller chunks
    chunk_overlap=5
)
char_chunks = char_splitter.split_text(text)

# Print chunks
print("\n🧱 CharacterTextSplitter Chunks:\n")
print(char_chunks)
print([len(chunk)for chunk in char_chunks])


Created a chunk of size 68, which is longer than the specified 25



🧱 CharacterTextSplitter Chunks:

['Hello world', 'My name is Anuradha', 'I teach data science', 'LangChain is a powerful framework that helps build LLM applications']
[11, 19, 20, 67]


In [57]:
# 2. RecursiveCharacterTextSplitter
recursive_splitter = RecursiveCharacterTextSplitter(
    separators=[". ", " ", ""],
    chunk_size=25,
    chunk_overlap=5
)
recursive_chunks = recursive_splitter.split_text(text)


# Print chunks
print("\n🧱 RecursiveCharacterTextSplitter Chunks:\n")
print(recursive_chunks)
print([len(chunk)for chunk in recursive_chunks])



🧱 RecursiveCharacterTextSplitter Chunks:

['Hello world', '. My name is Anuradha', '. I teach data', 'data science.\nLangChain', 'is a powerful framework', 'that helps build LLM', 'LLM applications.']
[11, 21, 14, 23, 23, 20, 17]


In [58]:
recursive_splitter = RecursiveCharacterTextSplitter(
    separators=["\n\n","\n", " ", ""],
    chunk_size=400,
    chunk_overlap=20
)
chunks = recursive_splitter.split_documents(document)


# Print chunks
print(chunks)
print([len(chunk.page_content) for chunk in chunks])

[143, 368, 321, 199, 351, 373, 278, 283, 288, 297, 267, 301, 293, 277, 279, 390, 275, 296, 354, 291, 306, 299, 284, 288, 294, 373, 373, 337, 355, 386, 376, 367, 318, 393, 335, 348, 390, 354, 387, 227, 372, 395, 345, 336, 371, 319, 327, 384, 388, 335, 84, 371, 376, 383, 358, 384, 337, 388, 358, 172]


# Embedding And Storing Chunks


In [59]:
print(len(chunks))

60


In [60]:
# !pip install chromadb==0.4.17
# import chromadb
# from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction

In [61]:
from langchain.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

In [62]:
embedding_model = OpenAIEmbeddings(api_key=OPEN_API_KEY, model='text-embedding-3-small')


In [63]:
vector_store=Chroma.from_documents(documents=chunks,embedding=embedding_model,persist_directory="./chrome_store")

In [64]:
len(vector_store._collection.get()['documents'])

180

# LCEL Retrieval Chain


## Retriever

In [73]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 20})


## Create a Prompt Template

In [74]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

prompt = ChatPromptTemplate.from_template("""Use the following pieces of context to answer the question at the end.
If you don't know the answer, say that you don't know.
Context: {context} Question: {question}""")
llm = ChatOpenAI(model="gpt-4o-mini", api_key=OPEN_API_KEY, temperature=0)

## building lcel retrieval chain

In [75]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.runnables import RunnableLambda
chain=(
    {"context":RunnableLambda(lambda x: retriever.invoke(x['question'])),
    "question":RunnablePassthrough(),}
    | prompt
    | llm
    | StrOutputParser()
)

In [76]:
result=chain.invoke({"question":"What is the updated leave policy here?"})
print(result)

The updated leave policy includes the following provisions:

1. **Annual Leave**: All employees are entitled to thirty working days of annual leave. Leave entitlement for staff leaving the firm will be computed on a prorate basis.

2. **Maternity Leave**: Female staff are entitled to maternity leave on full pay for a maximum period of sixty working days, of which at least four weeks must follow childbirth.

3. **Paternity Leave**: Male staff are entitled to paternity leave of five working days once every year.

4. **Sick Leave**: Staff are entitled to sick leave not exceeding one month, with full pay, upon presentation of an appropriate medical report from a qualified medical practitioner.

5. **Compassionate Leave**: In cases of personal difficulty, staff may apply for and be granted leave not exceeding five working days.

This policy outlines the various types of leave available to employees and the conditions under which they can be taken.


In [77]:
result=chain.invoke({"question":"Who is the prime minister of India?"})
print(result)

I don't know.
