## Step 0 — Imports Packages

In [12]:
from typing import List

In [1]:
import numpy as np
import torch
import faiss
import pandas as pd

print("numpy", np.__version__)
print("torch", torch.__version__)
print("CUDA available:", torch.cuda.is_available())

numpy 2.3.5
torch 2.2.2
CUDA available: False


In [6]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from sentence_transformers import SentenceTransformer, util
import faiss

## Step 1 — Knowledge Base (KB)

Create a small KB for your RAG system.

kb_text = """
Company X is committed to environmental sustainability. All employees are encouraged
to reduce waste, recycle, and conserve energy. The company hosts an annual
sustainability week where departments compete to implement green initiatives.

Employees are eligible for remote work up to two days per week. Flexible hours
are allowed with manager approval. The HR department maintains a digital leave
tracking system and processes requests within 48 hours.

The company provides training programs for career growth, including workshops
on communication, leadership, and technical skills. Participation is voluntary
but recommended for all employees to enhance skill development.
"""

## Step 2 - Split it into chunks (useful for embeddings)

In [7]:
kb_chunks = kb_text.strip().split("\n\n")
print("Chunks:", kb_chunks)

Chunks: ['Company X is committed to environmental sustainability. All employees are encouraged\nto reduce waste, recycle, and conserve energy. The company hosts an annual\nsustainability week where departments compete to implement green initiatives.', 'Employees are eligible for remote work up to two days per week. Flexible hours\nare allowed with manager approval. The HR department maintains a digital leave\ntracking system and processes requests within 48 hours.', 'The company provides training programs for career growth, including workshops\non communication, leadership, and technical skills. Participation is voluntary\nbut recommended for all employees to enhance skill development.']


## Step 3 — Generate embeddings

Use a Sentence Transformer to embed KB chunks:

In [8]:
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
kb_embeddings = embedding_model.encode(kb_chunks, convert_to_tensor=True)
print("KB embeddings shape:", kb_embeddings.shape)


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

KB embeddings shape: torch.Size([3, 384])


## Step 4 — Build FAISS index

In [9]:
d = kb_embeddings.shape[1]  # embedding dimension
index = faiss.IndexFlatL2(d)  # L2 distance index
index.add(kb_embeddings.cpu().numpy())  # add KB embeddings
print("Number of vectors in FAISS:", index.ntotal)


Number of vectors in FAISS: 3


## Step 5 — Retrieval function

In [28]:
def retrieve(query: str, k: int = 2, threshold: float = 0.5) -> List[str]:
    query_emb = embedding_model.encode([query], convert_to_tensor=True).cpu().numpy()
    distances, indices = index.search(query_emb, k)
    # Convert distances to similarity (for L2, similarity = 1/(1+distance))
    similarity = 1 / (1 + distances)
    chunks = []
    for i, sim in zip(indices[0], similarity[0]):
        if sim >= threshold:
            chunks.append(kb_chunks[i])
    return chunks if chunks else ["No relevant information found in KB."]

In [29]:
def retrieve(query: str, k: int = 2) -> List[str]:
    query_emb = embedding_model.encode([query], convert_to_tensor=True).cpu().numpy()
    distances, indices = index.search(query_emb, k)
    return [kb_chunks[i] for i in indices[0]]

In [30]:
# Example retrieval
query_example = "What training programs does the company provide?"
retrieved_chunks = retrieve(query_example)
print("Retrieved:", retrieved_chunks)

Retrieved: ['The company provides training programs for career growth, including workshops\non communication, leadership, and technical skills. Participation is voluntary\nbut recommended for all employees to enhance skill development.', 'Company X is committed to environmental sustainability. All employees are encouraged\nto reduce waste, recycle, and conserve energy. The company hosts an annual\nsustainability week where departments compete to implement green initiatives.']


## Step 6 — LLM generation

Use a small pre-trained model like t5-small for context-aware generation:

In [38]:
llm_model_name = "t5-small"
tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(llm_model_name)

def generate_answer(query: str, context_chunks: List[str], max_length: int = 200):
    if not context_chunks or context_chunks[0] == "No relevant information found in KB.":
        return "I don't know — the KB has no relevant information."
    
    # Remove duplicates
    context = " ".join(list(dict.fromkeys(context_chunks)))
    
    # Make prompt explicit for synthesis / reasoning
    prompt = (
        f"Based on the following information, provide a detailed answer to the question.\n"
        f"Context: {context}\n"
        f"Question: {query}\n"
        f"Answer in full sentences, combining information from all relevant parts of the context."
    )
    
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
    outputs = model.generate(
        **inputs, 
        max_length=max_length, 
        do_sample=True, 
        top_p=0.9, 
        temperature=0.7
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

## Step 6 — Test Cases

### Test Case 1 (Factual)

In [39]:
q1 = "How many remote work days are allowed?"
chunks1 = retrieve(q1)
print("Answer:", generate_answer(q1, chunks1))

Answer: : Employees are eligible for remote work up to two days per week. Flexible hours are allowed with manager approval. The HR department maintains a digital leave tracking system and processes requests within 48 hours.


### Test Case 2 (Foil/General)

In [41]:
q2 = "Who founded the company?"
chunks2 = retrieve(q2)
print("Answer:", generate_answer(q2, chunks2))

Answer: I don't know — the KB has no relevant information.


In [40]:
q3 = "What initiatives are employees encouraged to do and how are they trained?"
chunks3 = retrieve(q3)
print("Answer:", generate_answer(q3, chunks3))


Answer: Context: The company provides training programs for career growth, including workshops on communication, leadership, and technical skills. Participation is voluntary but recommended for all employees to enhance skill development.
