In [1]:
!pip install faiss-cpu sentence-transformers google-generativeai

Collecting faiss-cpu
  Downloading faiss_cpu-1.13.1-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Downloading faiss_cpu-1.13.1-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.7 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m23.7/23.7 MB[0m [31m59.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.13.1


In [2]:
# Step 1: Imports
# Import necessary libraries
import faiss                         # For fast similarity search
import numpy as np                   # For handling numerical data
from sentence_transformers import SentenceTransformer  # For text embeddings
import google.generativeai as genai  # For using Gemini API


In [3]:
# Step 2: Gemini API Setup
# Configure your Gemini API key (get from https://aistudio.google.com/app/apikey)
genai.configure(api_key="AIzaSyBh3CGd4XyrFlo0JObJxDcu4fgAVPykp94")  # üîë Replace with your key

In [20]:
model = genai.GenerativeModel("gemini-flash-latest")

In [21]:
# Step 3: Documents (Knowledge Base)
# These are simple predefined documents to serve as external knowledge
documents = [
    "Python is a popular programming language for data science.",
    "RAG is a technique that uses document retrieval with LLMs.",
    "Google Gemini is a family of large language models.",
    "FAISS helps in fast similarity search for vectors."
]

In [22]:
# Step 4: Create embeddings and FAISS index
# Load a pre-trained sentence embedding model
embedder = SentenceTransformer("all-MiniLM-L6-v2")

In [23]:
# Convert each document into a vector (embedding)
doc_embeddings = embedder.encode(documents)

In [24]:
# Create a FAISS index to perform fast nearest-neighbor searches
index = faiss.IndexFlatL2(doc_embeddings.shape[1])  # L2 = Euclidean distance
index.add(np.array(doc_embeddings))  # Add document vectors to the index

In [25]:
# Step 5: Ask a question
question = "What is RAG in AI?"

# Convert the question into an embedding
q_embedding = embedder.encode([question])

# Search the FAISS index for top 2 most similar documents
_, I = index.search(np.array(q_embedding), k=2)  # 'I' gives indices of top docs

In [26]:
# Step 6: Get relevant docs
# I is a 2D array of indices returned by FAISS for the top-k most similar documents
# I[0] accesses the list of indices for the first query (our question)
# We use a list comprehension to retrieve the actual documents based on those indices
# The '\n'.join(...) combines them into a single string with newlines for formatting
retrieved = "\n".join([documents[i] for i in I[0]])


In [27]:
# Step 7: Generate response using Gemini Flash
prompt = f"""You are an AI assistant. Use the following context to answer the question.

Context:
{retrieved}

Question: {question}
Answer:"""

# Call Gemini Flash model with prompt
response = model.generate_content(prompt)

# Step 8: Output
print("üîé Retrieved Context:\n", retrieved)
print("\nüí¨ Gemini Flash 2 Answer:\n", response.text)

üîé Retrieved Context:
 RAG is a technique that uses document retrieval with LLMs.
FAISS helps in fast similarity search for vectors.

üí¨ Gemini Flash 2 Answer:
 RAG is a technique that uses document retrieval with LLMs.
