Import the required dependencies

In [None]:
import os
import gradio as gr
from langchain_community.llms import Ollama
from langchain.document_loaders import UnstructuredFileLoader
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import PyPDFLoader

Load the files from knowledge base

In [2]:
def load_pdfs_from_directory(directory_path):
    documents = []
    for filename in os.listdir(directory_path):
        if filename.endswith('.pdf'):
            file_path = os.path.join(directory_path, filename)
            loader = PyPDFLoader(file_path)
            documents.extend(loader.load())
    return documents


In [None]:
pdf_directory = "sih manuals"
loaded_documents = load_pdfs_from_directory(pdf_directory)

Import the llama3:8b model

In [4]:
llm = Ollama(
    model="llama3:8b",
    temperature=0
)

split the documents into chunks using CharacterTextSplitter

In [5]:
text_splitter = CharacterTextSplitter(separator="/n", chunk_size=1500, chunk_overlap=200)
text_chunks = text_splitter.split_documents(loaded_documents)

In [None]:
Convert the chunks into embeddings using sentence transformer model

In [None]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
knowledge_base = FAISS.from_documents(text_chunks, embeddings)

In [7]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=knowledge_base.as_retriever())

In [None]:
Retrieval 

In [8]:
question = "explain about the employment classification ?"
response = qa_chain.invoke({"query": question})
print(response["result"])

According to Section 4 of the handbook, Employment Classification, there are three types of employment classifications:

* Exempt
* Non-Exempt
* Part-Time, Full-Time or Temporary Status

This information is provided in subsections 4.1, 4.2, and 4.3, respectively.


In [9]:
question = "explain about the Recruitment and selection process ?"
response = qa_chain.invoke({"query": question})
print(response["result"])

According to the provided context, the recruitment and selection process at this company involves the following steps:

1. Screen and interview candidates
2. Run background checks and check references
3. Select the most suitable candidate
4. Make an official offer

These steps may overlap, so it's possible that some steps might be skipped or done simultaneously. Each member of a hiring team may have different responsibilities, such as recruiters sourcing candidates and hiring managers interviewing candidates.

Additionally, the company aims to keep candidates informed throughout the process, communicate well with each other, and give everyone an equal opportunity to work with them.


In [10]:
question = "Mention about the health and safety at the workplace ?"
response = qa_chain.invoke({"query": question})
print(response["result"])

According to the provided context, the company takes every reasonable precaution to ensure that employees have a safe working environment. The company has a workplace safety policy that includes:

* Conducting risk assessments and job hazard analyses through a workplace safety committee
* Establishing preventative measures to address risks accordingly
* Providing protective gear like gloves, protective uniforms, and goggles
* Directing inspectors and quality control employees to evaluate equipment and infrastructure regularly

The company also expects employees to take safety seriously by always using protective equipment and following standards whenever necessary. If an employee deliberately disregards the guidelines, they may be terminated for their own and others' safety.

Additionally, the company has emergency management provisions in place, including functional smoke alarms and sprinklers that are regularly inspected, technicians available to repair leakages, damages, and blackou

In [13]:
question = "explain about the workplace policies ?"
response = qa_chain.invoke({"query": question})
print(response["result"])

According to the provided context, the workplace policies are outlined in Section 2 - Workplace Commitments and Section 3 - Company Policy and Procedures.

Section 2 covers:

* Equal Opportunity Employment
* Non-Harassment / Non-Discrimination
* Drug Free / Alcohol Free
* Open Door Policy

Section 3 covers:

* Code of Professional Conduct
* Dress Code
* Payday
* Company Property
* Privacy
* Personnel Files

These policies aim to establish a positive and productive work environment, ensuring that employees are treated fairly and with respect.


In [10]:
from langchain.memory import ConversationBufferMemory


In [11]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, output_key="result")

In [12]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=knowledge_base.as_retriever(),
    memory=memory,
    return_source_documents=True  # Optional: If you want to show retrieved documents as well
)

In [13]:
def chatbot_interface(question, history):
    # If there's history, append the previous conversation
    if history is None:
        history = []

    # Use the QA chain to generate a response
    response = qa_chain({"query": question, "chat_history": history})
    
    # Append the question and response to the history
    history.append((question, response["result"]))

    # Return the updated history
    return history, history

In [14]:
with gr.Blocks() as interface:
    chatbot = gr.Chatbot()
    with gr.Row():
        msg = gr.Textbox(show_label=False, placeholder="Enter your question...")
        submit = gr.Button("Send")
    
    # Display chat history
    clear = gr.Button("Clear")

    # When the user clicks "Send"
    submit.click(chatbot_interface, [msg, chatbot], [chatbot, chatbot])

    # Clear the chat history when "Clear" is clicked
    clear.click(lambda: None, None, chatbot)

In [None]:
import pdfplumber

def extract_text_from_pdfs(pdf_directory):
    texts = []
    for filename in os.listdir(pdf_directory):
        if filename.endswith('.pdf'):
            file_path = os.path.join(pdf_directory, filename)
            with pdfplumber.open(file_path) as pdf:
                for page in pdf.pages:
                    texts.append(page.extract_text())
    return texts

# Path to the directory containing your ground truth PDFs
pdf_directory = "sih manuals"
pdf_texts = extract_text_from_pdfs(pdf_directory)

In [17]:
import pdfplumber

def extract_text_from_pdfs(pdf_directory):
    texts = []
    for filename in os.listdir(pdf_directory):
        if filename.endswith('.pdf'):
            file_path = os.path.join(pdf_directory, filename)
            with pdfplumber.open(file_path) as pdf:
                for page in pdf.pages:
                    texts.append(page.extract_text())
    return texts

# Path to the directory containing your ground truth PDFs
pdf_directory = "sih manuals"
pdf_texts = extract_text_from_pdfs(pdf_directory)

In [18]:
def parse_ground_truth(texts):
    # Implement parsing logic here
    # For this example, assume each text contains question-answer pairs separated by newlines
    ground_truth_data = []
    for text in texts:
        lines = text.split('\n')
        for i in range(0, len(lines) - 1, 2):  # Assuming Q&A pairs are on successive lines
            question = lines[i].strip()
            answer = lines[i + 1].strip()
            ground_truth_data.append({"question": question, "answer": answer})
    return ground_truth_data

ground_truth_data = parse_ground_truth(pdf_texts)

In [None]:
def evaluate_accuracy(qa_chain, ground_truth_data):
    correct_count = 0
    total_count = len(ground_truth_data)

    for data in ground_truth_data:
        question = data["question"]
        correct_answer = data["answer"]
        response = qa_chain.invoke({"query": question})["result"]
        
        # For simplicity, perform a case-insensitive comparison
        if response.strip().lower() == correct_answer.strip().lower():
            correct_count += 1

    accuracy = (correct_count / total_count) * 100
    return accuracy

# Assuming you have already defined and set up your QA chain
# qa_chain = RetrievalQA.from_chain_type(llm, retriever=knowledge_base.as_retriever())

# Evaluate the accuracy
accuracy = evaluate_accuracy(qa_chain, ground_truth_data)
print(f"Accuracy: {accuracy:.2f}%")