In [48]:
!pip install -q langchain pypdf sentence-transformers faiss-cpu tiktoken langchain-community transformers requests
!pip install -U langchain-huggingface



In [49]:
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.schema import Document
from langchain.llms.base import LLM
from langchain.schema import HumanMessage, AIMessage

import requests
import re
import time
from typing import Optional, List, Mapping, Any
from requests.exceptions import HTTPError

In [50]:
def call_groq_api(payload, headers, max_retries=5):
    retry_count = 0
    backoff_time = 1  # start with 1 second

    while retry_count < max_retries:
        try:
            response = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=payload)
            response.raise_for_status()
            return response.json()
        except HTTPError as http_err:
            if response.status_code == 429:
                retry_count += 1
                print(f"Rate limit exceeded. Retrying in {backoff_time} seconds... (Attempt {retry_count}/{max_retries})")
                time.sleep(backoff_time)
                backoff_time *= 2  # Exponentially increase the wait time
            else:
                print(f"HTTP error occurred: {http_err}")
                return "I'm experiencing technical difficulties. Please try again later."
        except Exception as err:
            print(f"Other error occurred: {err}")
            return "I'm experiencing technical difficulties. Please try again later."
    raise Exception("Max retries exceeded. Please try again later.")

In [51]:
class GroqLLM(LLM):
    api_key: str
    model_name: str
    endpoint_url: str = "https://api.groq.com/openai/v1/chat/completions"

    @property
    def _llm_type(self) -> str:
        return "groq_chat"

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        return {
            "endpoint_url": self.endpoint_url,
            "model_name": self.model_name
        }

    def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs: Any) -> str:
        messages = kwargs.get("messages", [])
        if not messages:
            raise ValueError("No messages provided to GroqLLM._call(). Please pass messages as a kwarg.")

        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json"
        }

        payload = {
            "model": self.model_name,
            "messages": messages,
            "max_tokens": 4096,   # Ensure this is within the model's limit
            "temperature": 0.0,
            "top_p": 1.0
        }

        try:
            result = call_groq_api(payload, headers)
            if isinstance(result, str):
                # An error message was returned
                return result
            answer_text = result["choices"][0]["message"]["content"]
            return answer_text
        except HTTPError as http_err:
            print(f"HTTP error occurred: {http_err}")
            return "I'm experiencing technical difficulties. Please try again later."
        except Exception as err:
            print(f"Other error occurred: {err}")
            return "I'm experiencing technical difficulties. Please try again later."

In [52]:
groq_api_key = "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"
groq_model = "llama3-8b-8192"
llm = GroqLLM(api_key=groq_api_key, model_name=groq_model)

In [53]:
pdf_path = "lp.pdf"
loader = PyPDFLoader(pdf_path)
pages = loader.load()

def clean_text(text, patterns=None):
    if patterns:
        for pattern in patterns:
            text = re.sub(pattern, '', text)
    text = re.sub(r'\.{2,}', ' ', text)
    return text

patterns_to_remove = [
    r'Confidential Page \d+',
    r'Version: \d+\.\d+',
]

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=300
)
docs = text_splitter.split_documents(pages)

cleaned_docs = []
for doc in docs:
    cleaned_content = clean_text(doc.page_content, patterns=patterns_to_remove)
    cleaned_docs.append(Document(page_content=cleaned_content, metadata=doc.metadata))
docs = cleaned_docs


In [54]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
vectorstore = FAISS.from_documents(docs, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 15})

In [55]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

In [56]:
def build_messages(context: str, chat_history: str, question: str) -> List[dict]:
    """
    Constructs the message list.
    Add explicit instructions to not say "According to the provided PDF."
    """
    system_content = (
        "You are an AI assistant specializing in helping employees understand company policies at ContinuServe.\n"
        "Use only the provided PDF context and the conversation history to answer the questions.\n"
        "Your responses should closely follow the wording from the document.\n"
        "If the answer is not present in the context, respond with 'I don't know.'\n"
        "Do not provide additional information, interpretations, or legal advice.\n"
        "IMPORTANT: Never use phrases like 'According to the provided PDF,' 'As per the PDF,' or similar in your responses.\n"
        "If the information is present in the context, state it directly without referencing the PDF.\n\n"
        f"Context:\n{context}\n"
    )

    messages = [{"role": "system", "content": system_content}]

    if chat_history.strip():
        lines = chat_history.split("\n")
        user_msg = None
        assistant_msg = None
        for line in lines:
            line = line.strip()
            if line.lower().startswith("user:"):
                user_msg = line[5:].strip()
            elif line.lower().startswith("assistant:"):
                assistant_msg = line[10:].strip()

        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})

    messages.append({"role": "user", "content": question})
    return messages

In [57]:
def truncate_memory(memory, keep=2):
    # Keep only the last Q&A pair (last 2 messages)
    if len(memory.chat_memory.messages) > keep:
        memory.chat_memory.messages = memory.chat_memory.messages[-keep:]

In [58]:
def clean_response(response: str) -> str:
    unwanted_phrases = [
        "According to the provided PDF",
        "As per the PDF",
        "According to the document",
        "Based on the PDF",
        "According to the provided context"
    ]
    for phrase in unwanted_phrases:
        response = response.replace(phrase, "")
    return response.strip()

In [59]:
def ask_question(question: str):
    # Retrieve context
    rel_docs = retriever.get_relevant_documents(question)
    combined_context = "\n\n".join([d.page_content for d in rel_docs])

    # Build chat_history_str
    chat_history_str = ""
    if len(memory.chat_memory.messages) > 0:
        lines = []
        for msg in memory.chat_memory.messages:
            if isinstance(msg, HumanMessage):
                lines.append("User: " + msg.content)
            elif isinstance(msg, AIMessage):
                lines.append("Assistant: " + msg.content)
        chat_history_str = "\n".join(lines)

    messages = build_messages(context=combined_context, chat_history=chat_history_str, question=question)
    answer = llm._call(prompt="", messages=messages)
    cleaned_answer = clean_response(answer)

    print(f"Q: {question}\nA: {cleaned_answer}\n")

    # Update memory with Q&A pair
    memory.chat_memory.add_user_message(question)
    memory.chat_memory.add_ai_message(cleaned_answer)

    # Truncate memory to keep only last Q&A pair
    truncate_memory(memory)

In [60]:
test_queries_sp = [
    # Initial Queries
    "What is the notice period required when an employee resigns?",  # Initial Query
    "At what age is an employee eligible for retirement at ContinuServe?",  # Initial Query
    "Under what circumstances can an employee be terminated from ContinuServe?",  # Initial Query
    "How does the separation process differ for remote hires at ContinuServe?",  # Initial Query
    "What are the components included in the final settlement for separated employees?",  # Initial Query
    "What relieving formalities must an employee complete before separation?",  # Initial Query
    "What steps should an employee follow to initiate their resignation?",  # Initial Query
    "What is the process for handling the demise of an employee at ContinuServe?",  # Initial Query
    "How are leave balances handled during the separation process?",  # Initial Query

    # Follow-Up Queries (Memory Test)
    "Can the notice period be waived under any circumstances?",  # Follow-Up Query
    "What are the consequences if an employee is terminated for misconduct?",  # Follow-Up Query
    "What is the timeline for asset collection for remote hires during separation?",  # Follow-Up Query
    "What responsibilities does a project manager have during the relieving process?",  # Follow-Up Query
    "What forms must an employee complete in CS Connect for relieving formalities?",  # Follow-Up Query
    "How is the final settlement handled if an employee passes away during the notice period?",  # Follow-Up Query
]


In [61]:
test_queries_ep = [
    # Initial Queries
    "What is the quarterly amount allocated to employees for lunch or outing expenses?",  # Initial Query
    "Can these amounts be combined over multiple quarters?",  # Follow-Up Query

    # Comprehensive Testing Queries
    "What is the maximum time limit within which employees must claim their DSL (internet) expenses?",  # Initial Query
    "What is the maximum monthly reimbursement amount for DSL expenses?",  # Initial Query
    "How long after an event can birthday/anniversary expenses be claimed, and what happens if the deadline is missed?",  # Initial Query
    "What is the budget allocated per team for celebrating birthdays and anniversaries?",  # Initial Query
    "What documentation is required if the DSL bill is not in the employee's name?",  # Initial Query
    "What is the maximum amount allowed for DSL expenses per month?",  # Initial Query

    # Additional Queries for Testing Memory
    "Can the monthly reimbursement amount for DSL expenses be increased under any circumstances?",  # Follow-Up Query (Memory Test)
    "If an employee misses the deadline to claim birthday expenses, can they still claim it later?",  # Follow-Up Query (Memory Test)

    # Contextual Queries Referencing Previous Answers
    "Earlier, you mentioned that two quarters' amounts can be combined. How does this apply to internet expenses?",  # Follow-Up Query (Memory Test)
    "How can an employee verify the status of their claimed expenses in CS Connect?",  # Initial Query
]


In [62]:
test_queries_lp = [
# Initial query about vacation leave and immediate follow-up for memory testing
"How many vacation leave days are employees eligible for each year?", # Initial Query (Vacation Leave)
"When can these vacation leave days be taken?", # Follow-Up Query (Memory Test - referencing vacation leave)

# Another memory test scenario involving vacation leave usage
"What is the maximum number of vacation leave days that can be taken at a stretch in a month?", # Initial Query
"Can these vacation leave days be combined with work from home?", # Follow-Up Query (Memory Test - referencing the vacation leave from above)

# Maternity leave scenario with follow-up
"How long is the maternity leave, and how is it structured?", # Initial Query (Maternity Leave)
"Can maternity leave be extended beyond the allowed period under any circumstances?", # Follow-Up Query (Memory Test - referencing maternity leave)

# Paternity leave scenario with follow-up
"How many days of paternity leave are provided to male employees?", # Initial Query (Paternity Leave)
"Can paternity leave be combined with vacation leave if needed?", # Follow-Up Query (Memory Test - referencing paternity leave)

# Bereavement leave scenario with follow-up
"How many days are granted for bereavement leave work?", # Initial Query (Bereavement Leave)
"Is bereavement leave cashable?", # Follow-Up Query (Memory Test - referencing bereavement leave)


# Marriage leave scenario with follow-up
"How many days of marriage leave are employees entitled to?", # Initial Query (Marriage Leave)
"Can marriage leave be combined with other leaves like vacation leave?", # Follow-Up Query (Memory Test - referencing marriage leave)

# Education leave scenario with follow-up
"What is the procedure for availing education leave?", # Initial Query (Education Leave)
"What should employees do in case of education leave?", # Follow-Up Query (Memory Test - referencing education leave)

# Special sick leave scenario with follow-up
"For how may days can an employee get special sick leave?", # Initial Query (Special Sick Leave)
"If hospitalization is required, does it affect the duration of special sick leave?", # Follow-Up Query (Memory Test - referencing special sick leave)

]

In [63]:
test_queries_p = [
    # Initial Queries
    "how to claim internet bill reimbursment?",  # Initial Query
    "What if the bill is not under my name?",  # Initial Query
    "Give all details you have on internet bill reimbursement?",  # Initial Query
    "I have not gone to the activity. How ca I ckaim the money?",  # Initial Query

]


In [66]:
test_queries = [
# Initial query about vacation leave and immediate follow-up for memory testing
"What is the policy regarding the notice period if I leave the company?", # Initial Query (Vacation Leave)
"Anything in the document that concerns me if I leave the organization>",
"when does my probation end if I joined the company on June 12, 2024",
"what are all the necessary thing I should do if i want to leave the company after my probation period",
"how can I leave the company without any complications",
"If I am in probation period , can I leave the company as per my will?"
"what if I don't want to serve the notice period? what are my options to leave the organization peacefully?"

]

In [67]:
for q in test_queries:
    ask_question(q)

Q: What is the policy regarding the notice period if I leave the company?
A: According to the policy, after submission of resignation, the Vacation leave balance in the account of employees will be adjusted against the notice period days to be served by the employee, up to a maximum of 10 days only with the approval of Management.

Q: Anything in the document that concerns me if I leave the organization>
A: Yes, according to the document, if you leave the organization, the following applies:

* The Vacation leave balance in your account will be adjusted against the notice period days to be served by you, up to a maximum of 10 days only with the approval of Management.
* Any advance leave taken will be recovered during final settlement.
* Any leave balance adjustment towards notice period is limited to 10 working days only.
* Leave balance cannot be encashed during separation from the company. However, leave balance in your account will be adjusted against the notice period days to be s