In [1]:
!pip install -q -r requirements.txt


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
#importing the libraries:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain_community.vectorstores import LanceDB

from langchain_community.llms import HuggingFaceHub
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate
from langchain.prompts import ChatPromptTemplate
from langchain.chains import RetrievalQA

In [3]:
#reading the pdf from the folder:
loader = PyPDFLoader("HR-DigivateLabs-Leave-Policy.pdf")
documents = loader.load()

In [4]:
#loading the huggingface api key:
import os
os.environ['HUGGINGFACEHUB_API_TOKEN'] = "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"

In [5]:
def get_response(chunk_size,chunk_overlap,temperature,query):
    #splitting into chunks:
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size,
                                                   chunk_overlap=chunk_overlap)
    final_document = text_splitter.split_documents(documents)
    
    #initializing embedding technique:
    hugging_face_embeddings = HuggingFaceBgeEmbeddings(
        model_name="BAAI/bge-small-en-v1.5",
        model_kwargs={'device':'cpu'},
        encode_kwargs={'normalize_embeddings':True}
    )
    
    #creating the vector store:
    vector_store = LanceDB.from_documents(final_document[:],hugging_face_embeddings)
    
    #creating a retriever object:
    retriever = vector_store.as_retriever(search_type='similarity',
                                      search_kwargs={"k":3})
    
    #creating a prompt template:
    template = """You are a knowledgeable assistant trained to provide accurate answers based on the information 
    contained in the context. When a user asks you a question, your task is to:

    1. Carefully analyze the question.
    2. Search for the relevant information and summarize it.
    3. If the answer is found, respond with the complete and concised information.
    4. If the answer is not found in the document, respond with "I don't know."

    Question: {question} ,
    Context: {context},
    Answer: """

    prompt = PromptTemplate(template=template,
                            input_variables=["context","question"])
    
    
    
    #loading a hugging face model:
    llm = HuggingFaceHub(
            repo_id="mistralai/Mistral-7B-v0.1",
            model_kwargs={"temperature":temperature,
                          "max_length":500}
    )
    
    #creating a retireval QA:
    retrievalQA = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=True,
        chain_type_kwargs={"prompt":prompt}
    )
    
    #testing the model with a query:
    response = retrievalQA.invoke({"query":query})

    # Fetching only the context from the response:
    context = response['source_documents'][0].page_content
    
    return context

In [6]:
#testing the function:
chunk_size=1200
chunk_overlap=480
temperature=1

query="What is the process to apply for maternity leave?"

response = get_response(chunk_size,chunk_overlap,temperature,query)
print(response)

  from tqdm.autonotebook import tqdm, trange
  warn_deprecated(


Process  
 
o To avail maternity, the employee must notify the respective line manager of her intent to take 
maternity leave preferably no later than 15 weeks prior to the date of delivery and apply the leaves in 
Payasia.A scanned copy of the maternity certificate confi rming the date of delivery and relevant 
medical documents must be submitted to HR Deptt.  
o HR Deptt will respond to an employee‟s notification of leave plans within 28 days of being notified 
with the details of the expected return to work if the employee tak es her full entitlement.  
o In the event that childbirth occurs before the employee is due to commence maternity leave, such 
maternity leave will automatically start from the date of childbirth.  
o The employee is expected to provide a declaration in writing sta ting that the employee will not work in 
any other establishment during the period for which maternity benefits are being received. Violation of 
this policy/ clause will be viewed as serious breach

In [7]:
#testing the function:
chunk_size=1200
chunk_overlap=480
temperature=1

query="What are the Types of Maternity Leaves?"

response = get_response(chunk_size,chunk_overlap,temperature,query)
print(response)

HR-Digivate Labs -Leave-Policy 
  
 
 
 
All eligible women employees are entitled to maternity leave, as shown in the table below. The maternity 
leave is inclusive of weekly offs and public & national holidays.  
 
Types of Maternity 
Leaves  Leave Entitlement 
(In Weeks)  Documents required to be 
submitted to HR Deptt to 
avail the leave  Leave 
Commencement  
Maternity leave in case 
of women employee up 
to two surviving children  26 1. Confirmation of pregnancy 
along with t he date of delivery.  
2. Medical certificate from 
certified medical practitioner.  Not earlier than 8 
weeks prior to the 
date of delivery.  
Maternity leave in case 
of women employee with 
two or more children  12 1. Confirmation of pregnancy 
along with the date of delivery.  
2. Medical certificate from 
certified medical practitioner.  Not earlier than 6 
weeks prior to the 
date of delivery.  
Commissioning Mother  12 1. Medical Documents  
2. Birth certificate of the ch ild From the date the 
child

In [12]:
#testing the function:
chunk_size=600
chunk_overlap=150
temperature=1

query="When can I avail my leave of absence?"

response = get_response(chunk_size,chunk_overlap,temperature,query)
print(response)

Leave of absence can be availed only after completing 3 months of service from date of joining. Employees 
are encouraged to avail their leave within a cal endar year. Leave taken will be calculated based on Working 
Days only. Leave of absence can be availed in multiple of 1 day only.  Short Duration Leave (up to 2 days at a 
stretch, minimum  1 day ) can be availed of with prior intimation to Reporting Manager, followed by formal 
regularization  as mentioned  earlier.


In [39]:
#testing the function:
chunk_size=420
chunk_overlap=120
temperature=1

query="How many leaves can we carry forward?"

response = get_response(chunk_size,chunk_overlap,temperature,query)
print(response)

Carry Forward  
 
You can carry forward a maximum of 7 leaves to a new calendar year. Thus, your leave balance cannot 
exceed 22 days at any given time. For instance, if you have accumula ted 7 days of leave by the end of a year 
and have added 13 days by 1st December, your leave balance will be 20. However, if you utilize 10 days 
during December, your leave balance as on 1st January will still be 7 only.


In [231]:
%%markdown
## Saving the model

## Saving the model


In [17]:
import os

os.makedirs('PDF_ChatBot_RatHat/models',exist_ok=True)

In [18]:
from huggingface_hub import login

# Replace 'your_hugging_face_token' with your actual token
login(token=os.environ['HUGGINGFACEHUB_API_TOKEN'])

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /opt/app-root/src/.cache/huggingface/token
Login successful


In [None]:
#pip install optimum[exporters]

In [3]:
!optimum-cli export onnx --model mistralai/Mistral-7B-v0.1 PDF_ChatBot_RatHat/models/

Framework not specified. Using pt to export the model.
Loading checkpoint shards:   0%|                          | 0/2 [00:00<?, ?it/s]

In [6]:
from optimum.onnxruntime import ORTModelForSequenceClassification
from transformers import AutoTokenizer

model_checkpoint = "mistralai/Mistral-7B-v0.1"
save_directory = "PDF_ChatBot_RatHat/models/"

# Load a model from transformers and export it to ONNX
ort_model = ORTModelForSequenceClassification.from_pretrained(model_checkpoint, export=True)
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# Save the onnx model and tokenizer
ort_model.save_pretrained(save_directory)
tokenizer.save_pretrained(save_directory)

ModuleNotFoundError: No module named 'optimum'

In [14]:
# Save the FAISS index
import faiss
faiss.write_index(vector_store.index, "faiss_index.index")

In [10]:
vector_store.index

<faiss.swigfaiss_avx512.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x7f9a30b53480> >