In [29]:
import torch
from itertools import chain
from click import prompt
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema.document import Document
from langchain_community.embeddings.bedrock import BedrockEmbeddings
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain.vectorstores import Chroma

from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain_core.output_parsers import StrOutputParser
from langchain_community.llms import ollama
from langchain.document_loaders import DirectoryLoader
import streamlit as st


In [2]:
device= torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
from langchain_community.document_loaders import PyMuPDFLoader

loader = PyMuPDFLoader(
    "company_data.pdf")
data = loader.load()
data[0]


Document(metadata={'source': 'company_data.pdf', 'file_path': 'company_data.pdf', 'page': 0, 'total_pages': 5, 'format': 'PDF 1.7', 'title': '', 'author': 'Ram Chandran', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Word 2021', 'producer': 'Microsoft® Word 2021', 'creationDate': "D:20240712113318+05'30'", 'modDate': "D:20240712113318+05'30'", 'trapped': ''}, page_content="Company Name: TALENTSHIP \nHR Department Details \nCompany Policies \n1. Work Hours: Employees are expected to work 40 hours \nper week. Flexible work hours are available. \n2. Leave Policy: Employees are entitled to 2 casual leaves \nand 2 sick leaves per month. \n3. Remote Work Policy: Remote work is allowed for up to \n2 days per week with prior approval from the manager. \n4. Code of Conduct: All employees must adhere to the \ncompany's code of conduct, which includes professional \nbehavior, respect for colleagues, and maintaining \nconfidentiality. \n5. Performance Reviews: Performance reviews are \ncon

In [19]:
data[0].page_content


"Company Name: TALENTSHIP \nHR Department Details \nCompany Policies \n1. Work Hours: Employees are expected to work 40 hours \nper week. Flexible work hours are available. \n2. Leave Policy: Employees are entitled to 2 casual leaves \nand 2 sick leaves per month. \n3. Remote Work Policy: Remote work is allowed for up to \n2 days per week with prior approval from the manager. \n4. Code of Conduct: All employees must adhere to the \ncompany's code of conduct, which includes professional \nbehavior, respect for colleagues, and maintaining \nconfidentiality. \n5. Performance Reviews: Performance reviews are \nconducted bi-annually to ensure employees receive \nconstructive feedback and opportunities for growth. \nCompany Benefits \n1. Health Insurance: Comprehensive health insurance for \nemployees and their families. \n2. Retirement Plans: Company-contributed retirement \nsavings plan. \n3. Professional Development: Funding for certifications, \ncourses, and conferences. \n4. Paid Time O

In [20]:
type(data)

list

In [21]:
def split_documents(documents:list(data)):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=80,
        length_function=len,
        is_separator_regex=False
    )
    return text_splitter.split_documents(documents)

In [22]:
split_documents(data)

[Document(metadata={'source': 'company_data.pdf', 'file_path': 'company_data.pdf', 'page': 0, 'total_pages': 5, 'format': 'PDF 1.7', 'title': '', 'author': 'Ram Chandran', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Word 2021', 'producer': 'Microsoft® Word 2021', 'creationDate': "D:20240712113318+05'30'", 'modDate': "D:20240712113318+05'30'", 'trapped': ''}, page_content="Company Name: TALENTSHIP \nHR Department Details \nCompany Policies \n1. Work Hours: Employees are expected to work 40 hours \nper week. Flexible work hours are available. \n2. Leave Policy: Employees are entitled to 2 casual leaves \nand 2 sick leaves per month. \n3. Remote Work Policy: Remote work is allowed for up to \n2 days per week with prior approval from the manager. \n4. Code of Conduct: All employees must adhere to the \ncompany's code of conduct, which includes professional \nbehavior, respect for colleagues, and maintaining \nconfidentiality. \n5. Performance Reviews: Performance reviews are \nco

In [35]:
chuncks=split_documents(data)
print(chuncks[0])

page_content='Company Name: TALENTSHIP 
HR Department Details 
Company Policies 
1. Work Hours: Employees are expected to work 40 hours 
per week. Flexible work hours are available. 
2. Leave Policy: Employees are entitled to 2 casual leaves 
and 2 sick leaves per month. 
3. Remote Work Policy: Remote work is allowed for up to 
2 days per week with prior approval from the manager. 
4. Code of Conduct: All employees must adhere to the 
company's code of conduct, which includes professional 
behavior, respect for colleagues, and maintaining 
confidentiality. 
5. Performance Reviews: Performance reviews are 
conducted bi-annually to ensure employees receive 
constructive feedback and opportunities for growth. 
Company Benefits 
1. Health Insurance: Comprehensive health insurance for' metadata={'source': 'company_data.pdf', 'file_path': 'company_data.pdf', 'page': 0, 'total_pages': 5, 'format': 'PDF 1.7', 'title': '', 'author': 'Ram Chandran', 'subject': '', 'keywords': '', 'creator': 'Mic

In [40]:
chuncks

[Document(metadata={'source': 'company_data.pdf', 'file_path': 'company_data.pdf', 'page': 0, 'total_pages': 5, 'format': 'PDF 1.7', 'title': '', 'author': 'Ram Chandran', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Word 2021', 'producer': 'Microsoft® Word 2021', 'creationDate': "D:20240712113318+05'30'", 'modDate': "D:20240712113318+05'30'", 'trapped': ''}, page_content="Company Name: TALENTSHIP \nHR Department Details \nCompany Policies \n1. Work Hours: Employees are expected to work 40 hours \nper week. Flexible work hours are available. \n2. Leave Policy: Employees are entitled to 2 casual leaves \nand 2 sick leaves per month. \n3. Remote Work Policy: Remote work is allowed for up to \n2 days per week with prior approval from the manager. \n4. Code of Conduct: All employees must adhere to the \ncompany's code of conduct, which includes professional \nbehavior, respect for colleagues, and maintaining \nconfidentiality. \n5. Performance Reviews: Performance reviews are \nco

In [25]:
#embedding 
from openai import embeddings


def get_embedding_function():
    embeddings = BedrockEmbeddings(
        credentials_profile_name="default",region_name="us-east-1"
    )
    return embeddings

In [37]:
#def ollama_embedder():
#   embeddings=OllamaEmbeddings(model="nomic-embeb-text")
#   return embeddings

In [38]:
from langchain.vectorstores.chroma import Chroma

In [39]:
# Creating vector data base
def add_to_chroma(chuncks=list[data]):
    db=Chroma(
        persist_directory="db",embedding_function=get_embedding_function()
    )
    db.add_documents(new_chuncks, ids=new_chunck_ids)
    db.persist()

In [9]:
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers import MultiQueryRetriever

In [10]:
local_model="llama3"
llm=ChatOllama(model=local_model)

In [16]:
input_txt=input("Enter your message...")

In [11]:
prompt= ChatPromptTemplate.from_messages([
    ("system","you are a chat HR chat bot. you guide for the HR department queries. you name is Talentship ai"),
    ("user","user query:{query}")
])

In [12]:
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(),
    llm,
    prompt=prompt
)

In [13]:
temeplate="""Answer the following questions as best you can. You have access to the following tools:
{context}
question: {question}"""

prompt=ChatPromptTemplate.from_template(temeplate)

In [14]:
chain=(
    {'context': retriever,"question":RunnablePassthrough}
    | prompt
    | llm
    |StrOutputParser()
)