In [2]:
import os
import logging
from typing import Any, List

# ------------------- Logger Configuration -------------------
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
if not logger.hasHandlers():
    handler = logging.StreamHandler()
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)

# ------------------- Mock Vector DB Inserter -------------------
def text_db_insetter(vector_db, texts, file_name, page_no=1):
    """
    Mock function to simulate inserting into a vector database.
    Replace this with your actual implementation.
    """
    logger.info(f"[Mock] Inserting {len(texts)} chunks from '{file_name}' into vector database.")
    for i, chunk in enumerate(texts):
        logger.debug(f"Chunk {i+1}: {chunk[:50]}...")  # Show only the beginning of each chunk

# ------------------- Text Splitter -------------------
def text_splitter(text: str, text_chunker: Any) -> List[str]:
    if not text:
        raise ValueError("The input text cannot be empty.")
    try:
        splited_text = text_chunker.split_text(text)
    except Exception as e:
        raise Exception(f"An error occurred while splitting the text: {e}")
    return splited_text

# ------------------- Main Text File Processor -------------------
def process_text_file(text_path: str, vector_db, text_chunker):
    if not os.path.isfile(text_path):
        raise FileNotFoundError(f"The specified text file does not exist: {text_path}")

    logger.info(f"Processing text file: '{text_path}'")

    try:
        with open(text_path, 'r', encoding='utf-8') as file:
            raw_text = file.read()

        split_texts = text_splitter(text=raw_text, text_chunker=text_chunker)
        file_name = os.path.basename(text_path)

        text_db_insetter(vector_db=vector_db, texts=split_texts, file_name=file_name)
        logger.info(f"Inserted chunks from '{file_name}' into the vector database.")

    except Exception as e:
        logger.error(f"Error processing text file: {e}")
        raise


In [3]:
def load_questions(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        return [line.strip() for line in f if line.strip()]

questions = load_questions("dataquestion.txt")


In [4]:
class DummySplitter:
    def split_text(self, text):
        # Splits text into 100 character chunks
        return [text[i:i+100] for i in range(0, len(text), 100)]

# Example usage
text_chunker = DummySplitter()
vector_db = {}  # dummy placeholder
process_text_file('dataquestion.txt', vector_db, text_chunker)


2025-04-08 20:51:40,587 - INFO - Processing text file: 'dataquestion.txt'
2025-04-08 20:51:40,587 - INFO - [Mock] Inserting 6 chunks from 'dataquestion.txt' into vector database.
2025-04-08 20:51:40,587 - INFO - Inserted chunks from 'dataquestion.txt' into the vector database.


In [5]:
import google.generativeai as genai
import os
from dotenv import load_dotenv

load_dotenv()
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
model = genai.GenerativeModel("gemini-pro")

def ask_gemini(query, context_chunks):
    context = "\n".join(context_chunks)
    prompt = f"""Use the following context to answer the question:

    Context:
    {context}

    Question:
    {query}
    """
    response = model.generate_content(prompt)
    return response.text


In [9]:
def mock_retriever(vector_db, query):
    return list(vector_db.values())


In [12]:
import google.generativeai as genai

# Initialize with your actual Gemini API key
genai.configure(api_key="AIzaSyDxHKOiWZzWGA-tvdV_897-RyLnWB4nfDI")

# Use the correct model name
model = genai.GenerativeModel(model_name="models/gemini-1.5-pro-latest")


In [14]:
def ask_gemini(query, context_chunks):
    context = "\n".join(context_chunks)
    prompt = f"""Use the following context to answer the question:

    Context:
    {context}

    Question:
    {query}
    """

    response = model.generate_content(prompt)
    return response.text


In [16]:
for idx, question in enumerate(questions, start=1):
    context = mock_retriever(vector_db, question)
    answer = ask_gemini(question, context)
    print(f"\n🔹 Question {idx}: {question}")
    print(f"🧠 Answer: {answer}")



🔹 Question 1: 1. What is the largest planet in the solar system?
🧠 Answer: Jupiter is the largest planet in our solar system.


🔹 Question 2: 2. Which planet is known as the Red Planet?
🧠 Answer: Mars


🔹 Question 3: 3. How many planets are there in the solar system?
🧠 Answer: The provided context is empty.  It doesn't give any information about the number of planets in the solar system.  However, as of the current scientific consensus, there are eight planets in our solar system: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune.



ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 3
}
]