In [None]:
# Import pdfplumber to handle PDFs, create embeddings, perform vector search, and run a Hugging Face LLM.
#!pip install -q pdfplumber sentence-transformers faiss-cpu transformers accelerate bitsandbytes streamlit pyngrok
!pip install -q pdfplumber sentence-transformers faiss-cpu transformers accelerate bitsandbytes

In [None]:
#Import the two pdf files which is the data for building the model
from google.colab import files

uploaded = files.upload()  # Upload your two PDFs here

Saving USA_Employee_Handbook-Freely_Available.pdf to USA_Employee_Handbook-Freely_Available (2).pdf
Saving Vunani_Employee_Handbook.pdf to Vunani_Employee_Handbook (2).pdf


In [None]:
import pdfplumber

def pdf_to_text(pdf_path):
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
    return text

# Example: Suppose you uploaded two PDFs to Colab
pdf_files = ["USA_Employee_Handbook-Freely_Available.pdf", "Vunani_Employee_Handbook.pdf"]
pdf_texts = {f: pdf_to_text(f) for f in pdf_files}




In [None]:
def chunk_text(text, chunk_size=400, overlap=50):
    """Smaller chunks for testing; reduces memory and speeds up demo"""
    words = text.split()
    chunks = []
    start = 0
    while start < len(words):
        end = start + chunk_size
        chunks.append(" ".join(words[start:end]))
        start += chunk_size - overlap
    return chunks

docs = []
for pdf_name, text in pdf_texts.items():
    company_label = pdf_name.replace(".pdf", "")
    chunks = chunk_text(text)
    for c in chunks:
        docs.append({"text": c, "metadata": {"company": company_label}})


In [None]:
from sentence_transformers import SentenceTransformer
import numpy as np

# Smaller embedding model for demonstration
embedder = SentenceTransformer("BAAI/bge-small-en-v1.5")
chunk_texts = [doc["text"] for doc in docs]
embeddings = embedder.encode(chunk_texts, convert_to_numpy=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
import faiss #FAISS is a fast vector database from Facebook AI.

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)



FAISS allows the system to quickly find chunks similar in meaning to any query.

Without FAISS, searching would be slow on large datasets.

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

# Use a model name that exists
model_name = "tiiuae/Falcon3-1B-Instruct"

# It’s good to specify device_map="auto" and load_in_8bit if memory is constrained
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",         # Automatically map to GPU if available
    load_in_8bit=True,         # Reduce memory usage by loading the model in 8-bit
    torch_dtype=torch.float16   # Use half precision
)

generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Test it quickly
prompt = "You are an HR assistant. How many vacation days does the company provide?"
output = generator(prompt, max_new_tokens=50, do_sample=False, temperature=0.2)
print(output[0]['generated_text'])

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/826 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/656 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors:   0%|          | 0.00/3.34G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/113 [00:00<?, ?B/s]

Device set to use cpu
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


You are an HR assistant. How many vacation days does the company provide?
<|assistant|>
As an AI, I don't have access to specific company policies or data about the number of vacation days provided by a particular organization. However, I can suggest that you visit the company's official website or contact


In [None]:
def retrieve_chunks(query, top_k=3, company_filter=None):
    """Retrieve top_k most relevant chunks from FAISS index, optionally filtered by company"""
    # Compute embedding of query
    q_emb = embedder.encode([query], convert_to_numpy=True)

    # Search FAISS index
    D, I = index.search(q_emb, top_k)

    retrieved = []
    for idx in I[0]:
        if company_filter:
            if docs[idx]["metadata"]["company"] == company_filter:
                retrieved.append(docs[idx]["text"])
        else:
            retrieved.append(docs[idx]["text"])
    return retrieved

In [None]:
def rag_answer(query, company=None):
    context_chunks = retrieve_chunks(query, top_k=3, company_filter=company)
    if not context_chunks:
        return "No relevant information found for this company."
    context_text = "\n".join(context_chunks)
    prompt = f"Use the following HR policy context to answer the question.\n\nContext:\n{context_text}\n\nQuestion: {query}\nAnswer:"
    output = generator(prompt, max_new_tokens=150, do_sample=False, temperature=0.2)
    return output[0]["generated_text"]


In [19]:
sample_questions = [
    {"question": "How many vacation days are employees entitled to?", "company": "USA_Employee_Handbook-Freely_Available"},
    {"question": "What is the probation period for new hires?", "company": "Vunani_Employee_Handbook"},
    {"question": "Is remote work allowed?", "company": None}  # None = search all
]

for sq in sample_questions:
    print("="*50)
    print(f"Question: {sq['question']}")
    print(f"Company filter: {sq['company']}")
    answer = rag_answer(sq['question'], company=sq['company'])
    print(f"Answer: {answer}\n")


Question: How many vacation days are employees entitled to?
Company filter: USA_Employee_Handbook-Freely_Available
Answer: Use the following HR policy context to answer the question.

Context:
Or, you may use your PTO. Holiday pay ■ Exempt employees are entitled to their normal compensation without any deductions. ■ Permanent non-exempt employees receive holiday pay as a benefit after they have worked with us for more than [three months.] Working on a holiday These holidays are considered “off-days” for most employees. If you need a team member to work on a holiday, inform them at least [three days] in advance. If you are a non-exempt employee, you will receive your regular hourly rate with a premium for working on a holiday. If you are an exempt employee, we will grant you an additional day of PTO that you must take within [12 months] after that holiday. We [will/ won’t] count hours you worked on a holiday to decide whether you are entitled to overtime pay. Sick leave We offer [one we