In [None]:
!pip install -r requirements.txt

Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Collecting langchain
  Downloading langchain-0.2.14-py3-none-any.whl (997 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m997.8/997.8 kB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting langchain_community
  Downloading langchain_community-0.2.12-py3-none-any.whl (2.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m161.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain_core
  Downloading langchain_core-0.2.34-py3-none-any.whl (393 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m393.9/393.9 kB[0m [31m319.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pypdf
  Downloading pypdf-4.3.1-py3-none-any.whl (295 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.8/295.8 kB[0m [31m308.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentence_transformers
  Downloading sentence_tr

In [None]:
#importing the libraries:
#changes in github:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_community.llms import HuggingFaceHub
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

In [None]:
#reading the pdf from the folder:
loader = PyPDFLoader("HR-DigivateLabs-Leave-Policy.pdf")
documents = loader.load()

#splitting into chunks:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000,chunk_overlap=200)
final_document = text_splitter.split_documents(documents)
final_document[0]

In [194]:
#initializing embedding technique:
hugging_face_embeddings = HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True}
)

In [195]:
#creating the vector store:
vector_store = FAISS.from_documents(final_document[:500],hugging_face_embeddings)

In [196]:
#Query using similar search:
query = "How many leaves can we carry forward?"
relevant_documents = vector_store.search(query,search_type='similarity')
print(relevant_documents[0].page_content)

HR-Digivate Labs -Leave-Policy 
  
 
The above policy shall be applicable to all full time/contract Employees.  
 
Short duration leaves should only be approved by HR after consideration or BU Head and can be done only 
once or twice in a month.  
 
Carry Forward  
 
You can carry forward a maximum of 7 leaves to a new calendar year. Thus, your leave balance cannot 
exceed 22 days at any given time. For instance, if you have accumula ted 7 days of leave by the end of a year 
and have added 13 days by 1st December, your leave balance will be 20. However, if you utilize 10 days 
during December, your leave balance as on 1st January will still be 7 only.  
 
Encashment  
 
NO ENCASHMENT OF LEAVE. Un -availed leave may be adjusted at the time of separation, at the sole 
discretion  of the management.  
 
Casual  & Sick Leave  
 
NO SEPARATE CASUAL or SICK LEAVE, ALL LEAVES COMBINED INTO ONE COMMON POOL AS 
ACCRUED LEAVE . Sick leave exce eding 3 days requires submission of a medical certif

In [197]:
#creating a retriever object:
retriever = vector_store.as_retriever(search_type='similarity',
                                      search_kwargs={"k":3})
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceBgeEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7fbb6f4ffa00>, search_kwargs={'k': 3})

In [198]:
#loading the huggingface api key:
import os
os.environ['HUGGINGFACEHUB_API_TOKEN'] = "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"

In [199]:
#loading a hugging face model:
llm = HuggingFaceHub(
        repo_id="mistralai/Mistral-7B-v0.1",
        model_kwargs={"temperature":3,
                      "max_length":1000}
)

In [200]:

#creating a prompt template:
template = """You are an assistant for question-answering tasks. Use the following pieces of retrieved 
context to answer the question. If you don't know the answer, just say that you don't know. 
Please provide a detailed to-the-point summary of the following answer:
Question: {question} ,
Context: {context},
Answer: """

prompt = PromptTemplate(template=template,
                        input_variables=["context","question"])

In [201]:
#creating a retireval QA:
retrievalQA = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt":prompt}
)

In [202]:
#testing the model with a query:
query = "What is the process to avail a maternity leave?"
response = retrievalQA.invoke({"query":query})

# Fetching only the context from the response:
context = response['source_documents'][0].page_content
print(context)

HR-Digivate Labs -Leave-Policy 
  
 
 
 
All eligible women employees are entitled to maternity leave, as shown in the table below. The maternity 
leave is inclusive of weekly offs and public & national holidays.  
 
Types of Maternity 
Leaves  Leave Entitlement 
(In Weeks)  Documents required to be 
submitted to HR Deptt to 
avail the leave  Leave 
Commencement  
Maternity leave in case 
of women employee up 
to two surviving children  26 1. Confirmation of pregnancy 
along with t he date of delivery.  
2. Medical certificate from 
certified medical practitioner.  Not earlier than 8 
weeks prior to the 
date of delivery.  
Maternity leave in case 
of women employee with 
two or more children  12 1. Confirmation of pregnancy 
along with the date of delivery.  
2. Medical certificate from 
certified medical practitioner.  Not earlier than 6 
weeks prior to the 
date of delivery.  
Commissioning Mother  12 1. Medical Documents  
2. Birth certificate of the ch ild From the date the 
child