In [None]:
!pip install PyMupdf transformers sentence-transformers faiss-cpu requests

In [None]:
import os
import torch
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import requests # For making HTTP requests to the Gemini API
import json # For handling JSON data
from typing import List, Dict

In [None]:
from dotenv import load_dotenv

load_dotenv()
gemini_api_key = os.getenv("GEMINI_API_KEY")

In [None]:

# --- 2. Document Loading and Chunking ---
def load_documents(filepath: str) -> List[str]:
    """Loads text documents from a file."""
    if not os.path.exists(filepath):
        print(f"Error: Document file not found at {filepath}")
        return []
    with open(filepath, 'r', encoding='utf-8') as f:
        content = f.read()
    # Simple splitting by double newline for paragraphs/sections
    documents = [doc.strip() for doc in content.split('\n\n') if doc.strip()]
    print(f"Loaded {len(documents)} documents.")
    return documents

def chunk_document(document: str, chunk_size: int = 256, overlap: int = 50) -> List[str]:
    """
    Splits a single document into smaller chunks.
    This is a basic character-based splitter. For production, consider token-based.
    """
    chunks = []
    if not document:
        return chunks

    words = document.split()
    current_chunk = []
    current_length = 0

    for word in words:
        if current_length + len(word) + 1 <= chunk_size:
            current_chunk.append(word)
            current_length += len(word) + 1
        else:
            chunks.append(" ".join(current_chunk))
            # Start new chunk with overlap
            overlap_words = current_chunk[max(0, len(current_chunk) - overlap):]
            current_chunk = overlap_words + [word]
            current_length = sum(len(w) + 1 for w in current_chunk)
    if current_chunk:
        chunks.append(" ".join(current_chunk))

    print(f"Document chunked into {len(chunks)} pieces.")
    return chunks

# Create a dummy document file for demonstration
dummy_text = """
The quick brown fox jumps over the lazy dog. This is a classic sentence
used to demonstrate all letters of the alphabet. It's often used in
typing tests and font demonstrations.

Artificial intelligence (AI) is intelligence demonstrated by machines,
unlike the natural intelligence displayed by humans and animals.
Leading AI textbooks define the field as the study of "intelligent agents":
any device that perceives its environment and takes actions that maximize
its chance of successfully achieving its goals.

Machine learning (ML) is a subset of artificial intelligence that
focuses on the development of algorithms that allow computers to learn
from and make predictions or decisions based on data. It is a core component
of many modern AI applications.

Natural Language Processing (NLP) is a subfield of artificial intelligence
that deals with the interaction between computers and human language.
It involves enabling computers to understand, interpret, and generate human language.
NLP applications include machine translation, spam detection, and sentiment analysis.
"""
with open('sample_documents.txt', 'w', encoding='utf-8') as f:
    f.write(dummy_text)

# Load and chunk our documents
documents = load_documents('sample_documents.txt')
all_chunks = []
for doc in documents:
    all_chunks.extend(chunk_document(doc))

print(f"Total chunks created: {len(all_chunks)}")


In [None]:

# --- 3. Embedding Generation and Vector Store (FAISS) ---
class VectorStore:
    def __init__(self, embedding_model_name: str):
        """Initializes the embedding model and FAISS index."""
        self.embedding_model = SentenceTransformer(embedding_model_name)
        self.index = None
        self.chunk_map = [] # To map FAISS index to original text chunks
        print(f"Embedding model '{embedding_model_name}' loaded.")

    def build_index(self, chunks: List[str]):
        """Generates embeddings and builds the FAISS index."""
        print("Generating embeddings for chunks...")
        self.chunk_map = chunks
        chunk_embeddings = self.embedding_model.encode(chunks, convert_to_tensor=True)
        # Move embeddings to CPU and convert to numpy for FAISS
        chunk_embeddings_np = chunk_embeddings.cpu().numpy().astype('float32')

        # Get embedding dimension
        embedding_dim = chunk_embeddings_np.shape[1]

        # Initialize FAISS index (Flat index for exact search)
        self.index = faiss.IndexFlatL2(embedding_dim)
        self.index.add(chunk_embeddings_np)
        print(f"FAISS index built with {self.index.ntotal} vectors.")

    def retrieve(self, query: str, k: int = 3) -> List[str]:
        """Retrieves top-k most relevant chunks based on query."""
        if self.index is None:
            raise ValueError("FAISS index not built. Call build_index() first.")

        query_embedding = self.embedding_model.encode([query], convert_to_tensor=True)
        query_embedding_np = query_embedding.cpu().numpy().astype('float32')

        # Search the index
        distances, indices = self.index.search(query_embedding_np, k)

        retrieved_chunks = [self.chunk_map[idx] for idx in indices[0]]
        print(f"Retrieved {len(retrieved_chunks)} chunks for query.")
        return retrieved_chunks

# Initialize and build the vector store
vector_store = VectorStore(EMBEDDING_MODEL_NAME)
vector_store.build_index(all_chunks)

In [None]:
# --- 4. Retrieval Augmented Generation (RAG) System ---
class NonPromptBasedRAG:
    def __init__(self, vector_store: VectorStore):
        self.vector_store = vector_store
        print("NonPromptBasedRAG system initialized.")

    def generate_response(self, query: str, num_retrieved_chunks: int = 3) -> str:
        """
        Performs retrieval first, then uses the retrieved context for generation
        using the Google Gemini API.
        """
        print(f"\nProcessing query: '{query}'")

        # Step 1: Retrieve relevant documents (non-prompt based retrieval)
        retrieved_context_chunks = self.vector_store.retrieve(query, k=num_retrieved_chunks)
        context = "\n".join(retrieved_context_chunks)

        print("\n--- Retrieved Context ---")
        for i, chunk in enumerate(retrieved_context_chunks):
            print(f"Chunk {i+1}:\n{chunk}\n---")

        # Step 2: Formulate the prompt for the LLM using the retrieved context

        prompt = f"""
        You are a helpful AI assistant. Use the following context to answer the user's question.
        If you don't know the answer based on the context, just say you don't know.

        Context:
        {context}

        Question:
        {query}

        Answer:
        """

        print(f"\n--- Prompt sent to LLM ---\n{prompt}\n---")

        # Step 3: Send the prompt to the Google Gemini API
        headers = {
            'Content-Type': 'application/json',
        }
        params = {
            'key': GEMINI_API_KEY,
        }
        payload = {
            "contents": [
                {
                    "parts": [
                        {"text": prompt}
                    ]
                }
            ]
        }

        response = requests.post(GEMINI_API_URL, headers=headers, params=params, data=json.dumps(payload))

        if response.status_code == 200:
            try:
                response_json = response.json()
                # Navigate through the JSON structure to get the text
                generated_text = response_json['candidates'][0]['content']['parts'][0]['text']
                print("\n--- Generated Response ---")
                print(generated_text)
                return generated_text
            except (KeyError, IndexError, json.JSONDecodeError) as e:
                print(f"Error parsing Gemini API response: {e}")
                print("Full API Response:", response.text)
                return "Error: Could not parse response from language model."
        else:
            print(f"Error from Gemini API: Status Code {response.status_code}")
            print("Error details:", response.text)
            return f"Error: Language model API request failed (Status Code: {response.status_code})."

# --- Example Usage ---

# Initialize the RAG system
rag_system = NonPromptBasedRAG(vector_store)

# Ask a question
query = "What is Artificial Intelligence?"
response = rag_system.generate_response(query)

print(f"\nFinal Answer for query '{query}':")
print(response)

query = "What is NLP?"
response = rag_system.generate_response(query)

print(f"\nFinal Answer for query '{query}':")
print(response)

query = "Tell me about the quick brown fox."
response = rag_system.generate_response(query)

print(f"\nFinal Answer for query '{query}':")
print(response)