In [None]:
!pip install faiss-cpu sentence-transformers google-generativeai



In [None]:
# Step 1: Imports
# Import necessary libraries
import faiss                         # For fast similarity search
import numpy as np                   # For handling numerical data
from sentence_transformers import SentenceTransformer  # For text embeddings
import google.generativeai as genai  # For using Gemini API


In [None]:
# Step 2: Gemini API Setup
# Configure your Gemini API key (get from https://aistudio.google.com/app/apikey)
genai.configure(api_key="AIzaSyCq0pXjvOHtEFN-ZlcQv3cyuqkdc0_-XK4")  # 🔑 Replace with your key

In [None]:
# Load Gemini Flash model (faster, cheaper, optimized for speed)
model = genai.GenerativeModel("gemini-1.5-flash")

In [None]:
# Step 3: Documents (Knowledge Base)
# These are simple predefined documents to serve as external knowledge
documents = [
    "Python is a popular programming language for data science.",
    "RAG is a technique that uses document retrieval with LLMs.",
    "Google Gemini is a family of large language models.",
    "FAISS helps in fast similarity search for vectors."
]

In [None]:
# Step 4: Create embeddings and FAISS index
# Load a pre-trained sentence embedding model
embedder = SentenceTransformer("all-MiniLM-L6-v2")

In [None]:
# Convert each document into a vector (embedding)
doc_embeddings = embedder.encode(documents)

In [None]:
# Create a FAISS index to perform fast nearest-neighbor searches
index = faiss.IndexFlatL2(doc_embeddings.shape[1])  # L2 = Euclidean distance
index.add(np.array(doc_embeddings))  # Add document vectors to the index

In [None]:
# Step 5: Ask a question
question = "What is RAG in AI?"

# Convert the question into an embedding
q_embedding = embedder.encode([question])

# Search the FAISS index for top 2 most similar documents
_, I = index.search(np.array(q_embedding), k=2)  # 'I' gives indices of top docs

In [None]:
# Step 6: Get relevant docs
# I is a 2D array of indices returned by FAISS for the top-k most similar documents
# I[0] accesses the list of indices for the first query (our question)
# We use a list comprehension to retrieve the actual documents based on those indices
# The '\n'.join(...) combines them into a single string with newlines for formatting
retrieved = "\n".join([documents[i] for i in I[0]])


In [None]:
# Step 7: Generate response using Gemini Flash
prompt = f"""You are an AI assistant. Use the following context to answer the question.

Context:
{retrieved}

Question: {question}
Answer:"""

# Call Gemini Flash model with prompt
response = model.generate_content(prompt)

# Step 8: Output
print("🔎 Retrieved Context:\n", retrieved)
print("\n💬 Gemini Flash 2 Answer:\n", response.text)

🔎 Retrieved Context:
 RAG is a technique that uses document retrieval with LLMs.
FAISS helps in fast similarity search for vectors.

💬 Gemini Flash 2 Answer:
 In AI, RAG (Retrieval Augmented Generation) is a technique that combines large language models (LLMs) with a document retrieval system.  It uses the retrieval system, often employing tools like FAISS for efficient similarity search, to find relevant documents related to a user's query.  These documents are then used to augment the LLM's input, allowing the LLM to generate more accurate, informative, and contextually relevant responses.

