In [1]:
import os
from langchain.document_loaders import (
    PyPDFLoader,
    TextLoader,
    CSVLoader
)
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory


In [2]:
def load_hr_documents():
    documents = []
    loaders = [
        PyPDFLoader("hr_docs/Employee benefits policy - Valide le 24 avril 2023.pdf"),
        TextLoader("hr_docs/Employee handbook - Public.txt"),
        TextLoader("hr_docs/Les bonnes pratiques de communication à Gozem.txt"),
        TextLoader("hr_docs/Comprendre et agir contre les violences au travail à Gozem - Employés.txt"),
        CSVLoader("hr_docs/hr_faq.csv"),
    ]
    for loader in loaders:
        try:
            documents.extend(loader.load())
        except Exception as e:
            print(f"Error loading document: {e}")
    return documents


In [3]:
def process_hr_documents(documents):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1500,
        chunk_overlap=300,
        length_function=len
    )
    processed_docs = text_splitter.split_documents(documents)
    
    for doc in processed_docs:
        if not hasattr(doc, 'metadata'):
            doc.metadata = {}
        doc.metadata['document_type'] = doc.metadata.get('source', '').split('.')[-1]
        doc.metadata['is_confidential'] = 'confidential' in doc.metadata.get('source', '').lower()
    
    return processed_docs


In [4]:
def create_vector_store(documents):
    embedding_model = OpenAIEmbeddings()
    vectorstore = Chroma.from_documents(documents, embedding_model, persist_directory="chroma_store")
    return vectorstore


In [5]:
HR_PROMPT_TEMPLATE = """You are an HR assistant for {company_name}. 
Provide accurate, professional answers based on the provided context.
If you're unsure, direct the user to contact HR.

Context: {context}

Question: {question}
Answer in a professional tone while being helpful:"""

HR_PROMPT = PromptTemplate(
    template=HR_PROMPT_TEMPLATE,
    input_variables=["context", "question", "company_name"]
)


In [6]:
def initialize_hr_qa_chain(vector_store, company_name="Gozem"):
    llm = ChatOpenAI(model_name="gpt-4", temperature=0.2)

    memory = ConversationBufferMemory(
        memory_key="chat_history",
        return_messages=True,
        output_key='answer'
    )

    return ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vector_store.as_retriever(
            search_type="mmr",
            search_kwargs={"k": 5, "filter": {"is_confidential": False}}
        ),
        memory=memory,
        combine_docs_chain_kwargs={
            "prompt": HR_PROMPT.partial(company_name=company_name)
        },
        return_source_documents=True
    )
