In [2]:
!pip install faiss-cpu transformers sentence-transformers

Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.7 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-3.1.1-py3-none-any.whl.metadata (10 kB)
Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m52.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sentence_transformers-3.1.1-py3-none-any.whl (245 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.3/245.3 kB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu, sentence-transformers
Successfully installed faiss-cpu-1.8.0.post1 sentence-transformers-3.1.1


In [11]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load a pre-trained model for embeddings and a transformer model for generation
embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')  # Embeddings for retrieval
tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-cnn')  # Generative model
model = AutoModelForSeq2SeqLM.from_pretrained('facebook/bart-large-cnn')

In [12]:
# Sample corpus to retrieve from
documents = [
    "The Eiffel Tower is in Paris.",
    "The Mona Lisa is a painting by Leonardo da Vinci.",
    "The Great Wall of China is visible from space.",
    "Albert Einstein developed the theory of relativity.",
    "Python is the most popular programming language in the U.S.",
]

In [13]:
# 1. Encode documents into embeddings for FAISS
doc_embeddings = embedder.encode(documents, convert_to_tensor=True).cpu().detach().numpy()

# 2. Initialize FAISS index
index = faiss.IndexFlatL2(doc_embeddings.shape[1])  # L2 similarity
index.add(doc_embeddings)

def retrieve(query, top_k=1):
    # 3. Get the query embedding and perform the search
    query_embedding = embedder.encode([query], convert_to_tensor=True).cpu().detach().numpy()
    distances, indices = index.search(query_embedding, top_k)

    # Retrieve top K documents
    retrieved_docs = [documents[i] for i in indices[0]]
    return retrieved_docs

In [14]:
#def generate_answer(query):
    # # 4. Use the retrieval step
    # retrieved_docs = retrieve(query, top_k=1)  # Limit to top 1 document
    # context = " ".join(retrieved_docs)

    # # 5. Generate response using a generative model
    # input_text = f"Context: {context} Question: {query}"
    # inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
    # outputs = model.generate(**inputs)

    # # Decode the output to get the generated text
    # answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # return answer

In [15]:
def generate_answer(query):
  # 1. Use the retrieval step
  retrieved_docs = retrieve(query, top_k=1)  # Limit to top 1 document
  context = " ".join(retrieved_docs)

  # 2. Set up the input text for the model
  input_text = f"Context: {context} Question: {query}"

  # 3. Tokenize the input, specifying max_length and truncation
  inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)

  # 4. Generate response using the model
  outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)

  # Decode the output to get the generated text
  answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
  return answer

In [18]:
# Example query
query = "Which is the most popular programming language?"
answer = generate_answer(query)
print(f"Question: {query}")
print(f"Answer: {answer.split('. ')[0]}") # for one line answers :)

Question: Which is the most popular programming language?
Answer: Python is the most popular programming language in the U.S
