<a href="https://colab.research.google.com/github/Mohd-Saifuddin22/WebScraping-SLM/blob/main/SLM_evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Cell 1: Setup Environment & Load Data

import sys
import json
import os
import torch # PyTorch base library

# --- Installations ---
print("Installing required libraries...")
# Using --quiet to reduce installation noise
!pip install sentence-transformers --quiet
!pip install transformers accelerate --quiet
!pip install datasets --quiet # Useful for data handling

# Install FAISS - try GPU version first, fallback to CPU
if torch.cuda.is_available():
    print("CUDA detected, attempting to install faiss-gpu...")
    # This command often works in Colab, might need adjustment for specific local CUDA versions
    !pip install faiss-gpu --quiet
    # Verify faiss can be imported and uses GPU after install attempt
    try:
        import faiss
        # Simple check if GPU resources are accessible by FAISS
        if hasattr(faiss, 'GpuIndexFlatL2'):
             print("faiss-gpu installed successfully.")
        else:
             print("faiss-gpu installed, but GPU resources might not be accessible? Falling back to faiss-cpu install.")
             raise ImportError("FAISS GPU index not found")
    except ImportError:
        print("faiss-gpu import failed or GPU index unavailable, installing faiss-cpu...")
        !pip install faiss-cpu --quiet
else:
    print("CUDA not available, installing faiss-cpu...")
    !pip install faiss-cpu --quiet

print("Installations finished.")

# --- Imports ---
import faiss
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from datasets import Dataset, DatasetDict # Optional, but good for structure
import logging
import time
import pprint # For pretty printing dictionaries later

# --- Setup Logging ---
log_format = '%(asctime)s - %(levelname)s - %(message)s'
logging.basicConfig(level=logging.INFO, format=log_format, force=True)
logger = logging.getLogger(__name__)

# --- Determine Device ---
if torch.cuda.is_available():
    # Verify FAISS GPU is actually available after installation
    try:
        import faiss
        if hasattr(faiss, 'GpuIndexFlatL2'):
            device = torch.device("cuda")
            logger.info(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
        else:
            logger.warning("FAISS GPU index not found after install, defaulting to CPU.")
            device = torch.device("cpu")
    except ImportError:
         logger.warning("FAISS import failed after install, defaulting to CPU.")
         device = torch.device("cpu")
else:
    device = torch.device("cpu")
    logger.info("CUDA not available. Using CPU.")

# --- Load Scraped Data ---
# Ensure the JSON file is uploaded to Colab or accessible in the local directory
data_file = "myscheme_100_schemes_generic.json"
scraped_data = None # Initialize

# Check if file exists (especially important in Colab)
if not os.path.exists(data_file):
     logger.error(f"ERROR: Data file not found at '{data_file}'.")
     print(f"\n--- FILE NOT FOUND ---")
     print(f"Please make sure '{data_file}' is uploaded or available in the current directory:")
     # In Colab, print contents of current directory to help user
     if 'google.colab' in sys.modules:
         print("Files in current Colab directory (/content/):")
         !ls -lh
     # Stop execution if file not found
     raise FileNotFoundError(f"Required data file '{data_file}' not found.")
else:
    try:
        with open(data_file, 'r', encoding='utf-8') as f:
            scraped_data = json.load(f)
        logger.info(f"Successfully loaded {len(scraped_data)} schemes from {data_file}")
        # Basic validation
        if not isinstance(scraped_data, list) or not scraped_data:
             raise ValueError("Loaded data is not a non-empty list.")
        if not isinstance(scraped_data[0], dict):
             raise ValueError("Items in the loaded data are not dictionaries.")
        logger.info(f"First scheme title (preview): {scraped_data[0].get('main_heading_h1', 'N/A')}")
        print(f"\nSuccessfully loaded data for {len(scraped_data)} schemes.")

    except json.JSONDecodeError as e:
         logger.error(f"ERROR: Could not decode JSON from '{data_file}'. File might be corrupted: {e}")
         scraped_data = None
    except Exception as e:
         logger.error(f"ERROR: An unexpected error occurred loading data: {e}", exc_info=True)
         scraped_data = None

    if not scraped_data:
         print("\n--- FATAL ERROR: Could not load valid scheme data. Cannot proceed. ---")
         # Stop execution or raise error
         raise ValueError(f"Failed to load valid data from {data_file}")

Installing required libraries...
CUDA detected, attempting to install faiss-gpu...
[31mERROR: Could not find a version that satisfies the requirement faiss-gpu (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for faiss-gpu[0m[31m
[0mfaiss-gpu installed, but GPU resources might not be accessible? Falling back to faiss-cpu install.
faiss-gpu import failed or GPU index unavailable, installing faiss-cpu...


2025-04-19 07:14:01,755 - INFO - Successfully loaded 100 schemes from myscheme_100_schemes_generic.json
2025-04-19 07:14:01,756 - INFO - First scheme title (preview): Jagananna Chedodu


Installations finished.

Successfully loaded data for 100 schemes.


In [None]:
# Cell 2: Data Preprocessing & Chunking

# Ensure scraped_data, logger are available from Cell 1
# Ensure pprint is imported if not already
import pprint
import re # For basic cleaning

def preprocess_and_chunk(data):
    """
    Processes scraped data into text chunks suitable for embedding.
    Each chunk represents a section within a scheme.

    Args:
        data (list): The list of dictionaries loaded from the JSON file.

    Returns:
        tuple: (list of str, list of dict)
               - chunk_texts: A list of formatted text chunks.
               - chunk_metadata: A list of metadata dictionaries corresponding
                                 to each text chunk.
    """
    chunk_texts = []
    chunk_metadata = [] # Store corresponding metadata

    if not data:
        logger.error("Input data is empty or None.")
        return chunk_texts, chunk_metadata

    logger.info(f"Starting preprocessing for {len(data)} schemes...")
    schemes_processed = 0
    total_sections_processed = 0

    for i, scheme in enumerate(data):
        if not isinstance(scheme, dict):
             logger.warning(f"Item at index {i} is not a dictionary, skipping.")
             continue

        # Extract top-level info, providing defaults
        scheme_name = scheme.get('main_heading_h1', '').strip() or scheme.get('page_title', f'Unknown Scheme {i+1}').strip()
        scheme_url = scheme.get('scheme_url', f'URL_{i+1}')
        page_title = scheme.get('page_title', scheme_name) # Fallback title

        sections = scheme.get('extracted_sections', [])
        if not isinstance(sections, list):
             logger.warning(f"Scheme '{scheme_name}' has invalid 'extracted_sections' format, skipping sections.")
             sections = []

        if not sections:
            logger.warning(f"Scheme '{scheme_name}' has no valid sections found in 'extracted_sections'.")
            # Option: could create a chunk from page_title/h1 if sections are missing?
            # For now, we only chunk based on sections.

        schemes_processed += 1
        logger.debug(f"Processing scheme: {scheme_name} ({len(sections)} sections)")

        for section in sections:
            if not isinstance(section, dict):
                 logger.warning(f"  Invalid section format found in scheme '{scheme_name}', skipping.")
                 continue

            heading = section.get('heading_text', 'General Info').strip()
            content = section.get('content', '').strip()

            # --- Basic Cleaning ---
            # Remove excessive consecutive newlines (more than 2)
            content = re.sub(r'\n{3,}', '\n\n', content)
            # Remove leading/trailing whitespace from each line (optional)
            # content = "\n".join([line.strip() for line in content.split('\n')])
            content = content.strip()

            if not content: # Skip sections with no content after stripping
                 logger.debug(f"  Skipping empty content for section: '{heading}' in scheme '{scheme_name}'")
                 continue

            # --- Create Chunk and Metadata ---
            # Combine context (scheme name, section heading) with content
            chunk_text = f"Scheme Name: {scheme_name}\nSection: {heading}\n\n{content}"
            chunk_texts.append(chunk_text)

            metadata = {
                'scheme_url': scheme_url,
                'scheme_name': scheme_name,
                'page_title': page_title,
                'section_heading': heading,
            }
            chunk_metadata.append(metadata)
            total_sections_processed += 1

    logger.info(f"Preprocessing finished.")
    logger.info(f"Processed {schemes_processed} schemes.")
    logger.info(f"Created {len(chunk_texts)} text chunks from {total_sections_processed} valid sections.")

    return chunk_texts, chunk_metadata

# --- Run the preprocessing ---
chunk_texts = []
chunk_metadata = []
chunk_dataset = None # Initialize dataset variable

if scraped_data:
    logger.info("--- Starting Data Preprocessing and Chunking ---")
    chunk_texts, chunk_metadata = preprocess_and_chunk(scraped_data)

    # --- Preview ---
    if chunk_texts:
         print(f"\nSuccessfully created {len(chunk_texts)} chunks.")
         print("\n--- Example Chunk 1 ---")
         # Print limited length to avoid flooding output
         print(chunk_texts[0][:1000] + ('...' if len(chunk_texts[0]) > 1000 else ''))
         print("\n--- Metadata for Chunk 1 ---")
         pp = pprint.PrettyPrinter(indent=2)
         pp.pprint(chunk_metadata[0])
         print("-------------------------")

         # --- Optional: Create Hugging Face Dataset ---
         logger.info("Attempting to create Hugging Face Dataset...")
         try:
             from datasets import Dataset
             # Ensure lists are of the same length before creating Dataset
             if len(chunk_texts) == len(chunk_metadata):
                 # Create a list of dictionaries, suitable for Dataset.from_list
                 dataset_data = []
                 for i in range(len(chunk_texts)):
                      # Add the text chunk itself to the metadata dictionary
                      entry = chunk_metadata[i].copy() # Create a copy
                      entry['text'] = chunk_texts[i]
                      dataset_data.append(entry)

                 chunk_dataset = Dataset.from_list(dataset_data)
                 logger.info("Created Hugging Face Dataset object from chunks.")
                 print("\nCreated Hugging Face Dataset 'chunk_dataset'.")
                 print(chunk_dataset) # Print dataset info (shows columns, num rows)
             else:
                  logger.error("Mismatch between number of text chunks and metadata entries. Cannot create Dataset.")
                  print("\nError: Mismatch between text chunks and metadata count.")

         except ImportError:
             logger.warning("Hugging Face 'datasets' library not available or failed to import. Skipping Dataset creation.")
         except Exception as e:
             logger.error(f"Error creating Hugging Face Dataset: {e}", exc_info=True)


    else:
         logger.error("Preprocessing did not produce any text chunks.")
         print("\nError: No text chunks were created.")

else:
    logger.error("Cannot preprocess data, 'scraped_data' is not loaded correctly.")
    print("\nError: Scraped data not available for preprocessing. Please run Cell 1 successfully.")

2025-04-19 07:14:07,592 - INFO - --- Starting Data Preprocessing and Chunking ---
2025-04-19 07:14:07,593 - INFO - Starting preprocessing for 100 schemes...
2025-04-19 07:14:07,624 - INFO - Preprocessing finished.
2025-04-19 07:14:07,626 - INFO - Processed 100 schemes.
2025-04-19 07:14:07,626 - INFO - Created 828 text chunks from 828 valid sections.
2025-04-19 07:14:07,627 - INFO - Attempting to create Hugging Face Dataset...
2025-04-19 07:14:07,680 - INFO - Created Hugging Face Dataset object from chunks.



Successfully created 828 chunks.

--- Example Chunk 1 ---
Scheme Name: Jagananna Chedodu
Section: Details

"Jagananna Chedodu" is a State-Funded Social Welfare Scheme by the B.C. Welfare Department, Govt. of Andhra Pradesh. The scheme is only for tailors (All communities), Rajakas (washermen), and Nayee Brahmins (Barbers) of the state. The beneficiary will be provided a one-time amount of ₹ 10,000 per year for a period of 5 years i.e., ₹ 50,000/- in five installments. This fund can be utilized by the beneficiaries for purchasing tools, equipment, and other essentials to grow their source of income and work establishment. The Functionary Responsible for Scheme Delivery is/are Welfare & Education & Assistant / Ward Welfare & Development Secretary at Village/Ward secretariats level. At the district level, the Executive Director, of BC Corporation will coordinate with the assistance of MPDOs / Municipal Commissioner.

--- Metadata for Chunk 1 ---
{ 'page_title': 'Jagananna Chedodu',
  'sc

In [None]:
# Cell 3: Initialize Embedding Model

# Ensure SentenceTransformer, logger, device are available from Cell 1
from sentence_transformers import SentenceTransformer

# --- Configuration ---
# Choose a reliable and efficient embedding model from Hugging Face Hub
# Alternative: embedding_model_name = 'BAAI/bge-small-en-v1.5' # Good BGE option
embedding_model_name = 'sentence-transformers/all-MiniLM-L6-v2'

logger.info(f"Loading sentence transformer embedding model: '{embedding_model_name}' onto device: {device}")

embedding_model = None # Initialize variable
try:
    # Load the model using sentence-transformers library
    # It will be automatically moved to the 'device' (CPU or CUDA)
    embedding_model = SentenceTransformer(embedding_model_name, device=device)

    # Small test to ensure it works and get embedding dimension
    test_sentence = ["This is a test."]
    test_embedding = embedding_model.encode(test_sentence)
    embedding_dim = test_embedding.shape[1] # Get the dimension (e.g., 384 for MiniLM)

    logger.info(f"Embedding model '{embedding_model_name}' loaded successfully.")
    logger.info(f"Test embedding successful. Output vector dimension: {embedding_dim}")
    print(f"\nEmbedding model '{embedding_model_name}' is ready.")
    print(f" - Max Sequence Length: {embedding_model.max_seq_length} tokens")
    print(f" - Embedding Dimension: {embedding_dim}")

except Exception as e:
    logger.error(f"Failed to load embedding model '{embedding_model_name}': {e}", exc_info=True)
    print(f"\n--- ERROR: Failed to load embedding model {embedding_model_name} ---")
    # Handle error appropriately in a full script, e.g., sys.exit()

# Verify model loaded before proceeding
if not embedding_model:
     raise RuntimeError("Embedding model failed to load. Cannot proceed.")

2025-04-19 07:14:21,471 - INFO - Loading sentence transformer embedding model: 'sentence-transformers/all-MiniLM-L6-v2' onto device: cpu
2025-04-19 07:14:21,476 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-04-19 07:14:34,409 - INFO - Embedding model 'sentence-transformers/all-MiniLM-L6-v2' loaded successfully.
2025-04-19 07:14:34,410 - INFO - Test embedding successful. Output vector dimension: 384



Embedding model 'sentence-transformers/all-MiniLM-L6-v2' is ready.
 - Max Sequence Length: 256 tokens
 - Embedding Dimension: 384


In [None]:
# Cell 4: Embed Chunks

# Ensure embedding_model (from Cell 3), chunk_texts (from Cell 2),
# embedding_dim (from Cell 3), logger, device are available
import numpy as np # Import numpy for checking shape later if needed

# Initialize variable
chunk_embeddings = None

if 'embedding_model' in locals() and embedding_model and \
   'chunk_texts' in locals() and chunk_texts:

    logger.info(f"Starting to generate embeddings for {len(chunk_texts)} text chunks using '{embedding_model_name}' on device '{embedding_model.device}'...")
    print(f"Generating embeddings for {len(chunk_texts)} chunks. This might take a few moments...")

    # Use the encode method. It handles batching automatically.
    # show_progress_bar is helpful for larger datasets.
    start_time = time.time()
    try:
        chunk_embeddings = embedding_model.encode(
            chunk_texts,
            show_progress_bar=True,
            convert_to_numpy=True # Ensures output is a NumPy array
        )
        end_time = time.time()
        logger.info(f"Finished generating embeddings in {end_time - start_time:.2f} seconds.")

        # --- Verification ---
        logger.info(f"Shape of generated embeddings array: {chunk_embeddings.shape}")

        # Verify shape -> (number_of_chunks, embedding_dimension)
        expected_shape = (len(chunk_texts), embedding_dim)
        if chunk_embeddings.shape == expected_shape:
             logger.info("Embeddings generated successfully and shape is correct.")
             print(f"\nSuccessfully generated embeddings for {chunk_embeddings.shape[0]} chunks.")
        else:
             logger.error(f"Mismatch in expected embedding shape! Expected {expected_shape}, Got {chunk_embeddings.shape}")
             print(f"\nError: Embedding generation resulted in unexpected shape.")
             chunk_embeddings = None # Invalidate embeddings if shape is wrong

    except Exception as e:
        logger.error(f"An error occurred during embedding generation: {e}", exc_info=True)
        print(f"\nAn error occurred while generating embeddings.")
        chunk_embeddings = None

else:
    logger.error("Embedding model or text chunks not available. Please run previous cells successfully.")
    print("\nError: Cannot generate embeddings without model and data.")

2025-04-19 07:14:38,305 - INFO - Starting to generate embeddings for 828 text chunks using 'sentence-transformers/all-MiniLM-L6-v2' on device 'cpu'...


Generating embeddings for 828 chunks. This might take a few moments...


Batches:   0%|          | 0/26 [00:00<?, ?it/s]

2025-04-19 07:15:07,995 - INFO - Finished generating embeddings in 29.69 seconds.
2025-04-19 07:15:07,997 - INFO - Shape of generated embeddings array: (828, 384)
2025-04-19 07:15:07,998 - INFO - Embeddings generated successfully and shape is correct.



Successfully generated embeddings for 828 chunks.


In [None]:
# Cell 5: Build FAISS Index

# Ensure faiss, logger, np are available
# Ensure chunk_embeddings, embedding_dim are available from Cell 4

import numpy as np # Ensure numpy is imported
import faiss       # Ensure faiss is imported

faiss_index = None # Initialize index variable

if 'chunk_embeddings' in locals() and chunk_embeddings is not None and \
   'embedding_dim' in locals() and embedding_dim:

    logger.info(f"Building FAISS index for {chunk_embeddings.shape[0]} vectors with dimension {embedding_dim}...")
    try:
        # FAISS typically requires float32 data
        if chunk_embeddings.dtype != np.float32:
             logger.warning(f"Embeddings dtype is {chunk_embeddings.dtype}, converting to float32 for FAISS.")
             embeddings_for_faiss = chunk_embeddings.astype(np.float32)
        else:
             embeddings_for_faiss = chunk_embeddings

        # --- Create FAISS Index ---
        # IndexFlatL2 performs exact search using Euclidean distance.
        # Good for smaller datasets where exactness is desired over speed compromises.
        # Alternatives for larger datasets include IndexIVFFlat or IndexHNSWFlat.
        faiss_index = faiss.IndexFlatL2(embedding_dim)

        # Add the embeddings to the index
        faiss_index.add(embeddings_for_faiss)

        logger.info(f"FAISS index built successfully using IndexFlatL2.")
        logger.info(f"Number of vectors indexed: {faiss_index.ntotal}")

        # --- Verification ---
        if faiss_index.is_trained:
             logger.info("FAISS index is trained (as expected for IndexFlatL2).")
        if faiss_index.ntotal == len(chunk_texts):
             logger.info("FAISS index vector count matches number of text chunks.")
             print(f"\nSuccessfully built FAISS index with {faiss_index.ntotal} scheme section vectors.")
        else:
             logger.warning(f"FAISS index count ({faiss_index.ntotal}) does not match chunk count ({len(chunk_texts)}).")
             print(f"\nBuilt FAISS index, but vector count seems incorrect ({faiss_index.ntotal} vs {len(chunk_texts)}).")

    except Exception as e:
         logger.error(f"Failed to build FAISS index: {e}", exc_info=True)
         print("\n--- ERROR: Failed to build FAISS index ---")
         faiss_index = None

else:
    logger.error("Embeddings ('chunk_embeddings') not available. Cannot build FAISS index.")
    print("\nError: Embeddings not found. Please run Cell 4 successfully first.")

# Verify index created
if not faiss_index:
     raise RuntimeError("FAISS index building failed. Cannot proceed.")

2025-04-19 07:15:19,988 - INFO - Building FAISS index for 828 vectors with dimension 384...
2025-04-19 07:15:19,990 - INFO - FAISS index built successfully using IndexFlatL2.
2025-04-19 07:15:19,991 - INFO - Number of vectors indexed: 828
2025-04-19 07:15:19,991 - INFO - FAISS index is trained (as expected for IndexFlatL2).
2025-04-19 07:15:19,992 - INFO - FAISS index vector count matches number of text chunks.



Successfully built FAISS index with 828 scheme section vectors.


#LLMs

In [None]:
# Cell 6: Initialize Generator LLM

# Ensure transformers (AutoTokenizer, AutoModelForSeq2SeqLM), logger, device are available
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

# --- Configuration ---
# Choose a Sequence-to-Sequence model fine-tuned for instruction following / QA.
# Flan-T5 models are generally good at this. 'small' is efficient.
# Alternatives: 'google/flan-t5-base' (better quality, needs more VRAM/RAM)
# If using pure decoders like TinyLlama, use AutoModelForCausalLM and TextGenerationPipeline
generator_model_name = 'google/flan-t5-small'

logger.info(f"Loading generator LLM tokenizer: '{generator_model_name}'")

generator_model = None
tokenizer = None

try:
    # Load tokenizer associated with the model
    tokenizer = AutoTokenizer.from_pretrained(generator_model_name)
    logger.info("Tokenizer loaded successfully.")

    # Load the model itself and move it to the designated device (CPU/GPU)
    logger.info(f"Loading generator model '{generator_model_name}' onto device: {device}...")
    generator_model = AutoModelForSeq2SeqLM.from_pretrained(generator_model_name).to(device)

    # Set the model to evaluation mode (disables dropout, etc.)
    generator_model.eval()

    logger.info(f"Generator model '{generator_model_name}' loaded successfully onto {device}.")
    print(f"\nGenerator model '{generator_model_name}' is ready.")

    # Optional: Create a pipeline for easier generation later (can also use model.generate directly)
    # generator_pipeline = pipeline(
    #     'text2text-generation',
    #     model=generator_model,
    #     tokenizer=tokenizer,
    #     device=device # device=0 for cuda:0, device=-1 for cpu
    # )
    # logger.info("Created text2text-generation pipeline.")


except Exception as e:
    logger.error(f"Failed to load generator model or tokenizer '{generator_model_name}': {e}", exc_info=True)
    print(f"\n--- ERROR: Failed to load generator model {generator_model_name} ---")
    # Make sure variables are None if loading fails
    generator_model = None
    tokenizer = None
    # generator_pipeline = None


# Verify model and tokenizer loaded
if not generator_model or not tokenizer:
     raise RuntimeError("Generator model or tokenizer failed to load. Cannot proceed.")

2025-04-18 11:38:14,120 - INFO - Loading generator LLM tokenizer: 'google/flan-t5-small'


tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

2025-04-18 11:38:17,283 - INFO - Tokenizer loaded successfully.
2025-04-18 11:38:17,284 - INFO - Loading generator model 'google/flan-t5-small' onto device: cpu...


config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

2025-04-18 11:38:19,873 - INFO - Generator model 'google/flan-t5-small' loaded successfully onto cpu.



Generator model 'google/flan-t5-small' is ready.


In [None]:
# Cell 6: Initialize Generator LLM (Upgrade to flan-t5-base)

# Ensure transformers (AutoTokenizer, AutoModelForSeq2SeqLM), logger, device are available
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

# --- Configuration ---
# --- UPGRADE MODEL ---
generator_model_name = 'google/flan-t5-large' # Changed from small to base
# --- END UPGRADE ---

logger.info(f"Loading generator LLM tokenizer: '{generator_model_name}'")

generator_model = None
tokenizer = None

try:
    # Load tokenizer associated with the model
    tokenizer = AutoTokenizer.from_pretrained(generator_model_name)
    logger.info("Tokenizer loaded successfully.")

    # Load the model itself and move it to the designated device (CPU/GPU)
    logger.info(f"Loading generator model '{generator_model_name}' onto device: {device}...")
    generator_model = AutoModelForSeq2SeqLM.from_pretrained(generator_model_name).to(device)

    # Set the model to evaluation mode (disables dropout, etc.)
    generator_model.eval()

    logger.info(f"Generator model '{generator_model_name}' loaded successfully onto {device}.")
    print(f"\nGenerator model '{generator_model_name}' is ready.")

except Exception as e:
    logger.error(f"Failed to load generator model or tokenizer '{generator_model_name}': {e}", exc_info=True)
    print(f"\n--- ERROR: Failed to load generator model {generator_model_name} ---")
    generator_model = None
    tokenizer = None

# Verify model and tokenizer loaded
if not generator_model or not tokenizer:
     raise RuntimeError("Generator model or tokenizer failed to load. Cannot proceed.")

2025-04-19 07:20:22,694 - INFO - Loading generator LLM tokenizer: 'google/flan-t5-large'


config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

2025-04-19 07:20:47,512 - INFO - Tokenizer loaded successfully.
2025-04-19 07:20:47,513 - INFO - Loading generator model 'google/flan-t5-large' onto device: cpu...
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

2025-04-19 07:21:20,395 - INFO - Generator model 'google/flan-t5-large' loaded successfully onto cpu.



Generator model 'google/flan-t5-large' is ready.


In [None]:
# Cell 7: Define Retrieval Function (Corrected Test Call)

# Ensure faiss_index (Cell 5), embedding_model (Cell 3), chunk_texts (Cell 2),
# logger, np are available

import numpy as np # Ensure numpy is imported
import faiss       # Ensure faiss is imported

def retrieve_context(question, embedding_model, index, chunks_list, top_k=3):
    """
    Embeds the question and retrieves the top_k most relevant text chunks
    from the FAISS index.

    Args:
        question (str): The user's question.
        embedding_model: The loaded sentence-transformer model.
        index: The built FAISS index.
        chunks_list (list): The original list of text chunks.
        top_k (int): The number of relevant chunks to retrieve.

    Returns:
        list: A list of strings, each being a relevant text chunk,
              or empty list if an error occurs or no chunks found.
    """
    if not all([question, embedding_model, index, chunks_list]):
         logger.error("Missing required arguments for retrieve_context.")
         return []
    if index.ntotal == 0:
         logger.error("FAISS index is empty.")
         return []
    if len(chunks_list) != index.ntotal:
         logger.warning("Mismatch between FAISS index size and chunks list length.")

    logger.info(f"Retrieving top {top_k} context chunks for question: '{question[:100]}...'")
    try:
        # 1. Embed the question
        logger.debug("Encoding question...")
        question_embedding = embedding_model.encode(
            [question],
            convert_to_numpy=True
        ).astype(np.float32)
        logger.debug(f"Question embedding shape: {question_embedding.shape}")

        # 2. Search FAISS index
        logger.debug(f"Searching FAISS index (size {index.ntotal}) for top {top_k} neighbours...")
        distances, indices = index.search(question_embedding, top_k)
        retrieved_indices = indices[0]
        logger.debug(f"FAISS search results (Indices): {retrieved_indices}")
        logger.debug(f"FAISS search results (Distances): {distances[0]}")

        # 3. Retrieve the corresponding text chunks
        retrieved_chunks = [
            chunks_list[i] for i in retrieved_indices if 0 <= i < len(chunks_list)
        ]
        logger.info(f"Retrieved {len(retrieved_chunks)} chunks from index.")
        if len(retrieved_chunks) < top_k:
            logger.warning(f"Retrieved fewer chunks ({len(retrieved_chunks)}) than requested ({top_k}).")

        return retrieved_chunks

    except Exception as e:
        logger.error(f"Error during context retrieval: {e}", exc_info=True)
        return []

# --- Test the retrieval function ---
retrieval_test_passed = False
if 'faiss_index' in locals() and faiss_index and \
   'embedding_model' in locals() and embedding_model and \
   'chunk_texts' in locals() and chunk_texts:

    test_question = "What financial assistance is provided by Jagananna Chedodu?"
    logger.info(f"--- Testing retrieve_context function with question: '{test_question}' ---")

    # --- CORRECTED KEYWORD ARGUMENT ---
    retrieved_context_chunks = retrieve_context(
        question=test_question,
        embedding_model=embedding_model,
        index=faiss_index,
        chunks_list=chunk_texts,
        top_k=3 # Use top_k here
    )
    # --- END CORRECTION ---

    if retrieved_context_chunks:
        logger.info("Context retrieval test successful.")
        print(f"\n--- Retrieved Context for '{test_question}' (Top {len(retrieved_context_chunks)}) ---")
        for i, chunk in enumerate(retrieved_context_chunks):
            print(f"--- Context Chunk {i+1} ---")
            print(chunk[:700] + "...")
            print("-" * 25)
        retrieval_test_passed = True
    else:
        logger.error("Context retrieval test failed to return any chunks.")
        print("\nError: Context retrieval test failed.")

    logger.info("--- Finished testing retrieve_context function ---")

else:
    logger.error("Required components (index, model, chunks) not available for retrieval test.")
    print("\nError: Cannot test retrieval without FAISS index, embedding model, and text chunks.")

if not retrieval_test_passed:
     raise RuntimeError("Context retrieval test failed. Cannot proceed.")

2025-04-19 07:21:32,415 - INFO - --- Testing retrieve_context function with question: 'What financial assistance is provided by Jagananna Chedodu?' ---
2025-04-19 07:21:32,415 - INFO - Retrieving top 3 context chunks for question: 'What financial assistance is provided by Jagananna Chedodu?...'


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-04-19 07:21:32,462 - INFO - Retrieved 3 chunks from index.
2025-04-19 07:21:32,465 - INFO - Context retrieval test successful.
2025-04-19 07:21:32,467 - INFO - --- Finished testing retrieve_context function ---



--- Retrieved Context for 'What financial assistance is provided by Jagananna Chedodu?' (Top 3) ---
--- Context Chunk 1 ---
Scheme Name: Jagananna Chedodu
Section: Benefits

The beneficiary will be provided a one-time amount of ₹ 10,000 per year for a period of 5 years i.e., ₹ 50,000/- in five installments.
This fund can be utilized by the beneficiaries for purchasing tools, equipment, and other essentials to grow their source of income and work establishment....
-------------------------
--- Context Chunk 2 ---
Scheme Name: Jagananna Chedodu
Section: Details

"Jagananna Chedodu" is a State-Funded Social Welfare Scheme by the B.C. Welfare Department, Govt. of Andhra Pradesh. The scheme is only for tailors (All communities), Rajakas (washermen), and Nayee Brahmins (Barbers) of the state. The beneficiary will be provided a one-time amount of ₹ 10,000 per year for a period of 5 years i.e., ₹ 50,000/- in five installments. This fund can be utilized by the beneficiaries for purchasing tool

In [None]:
# Cell 8: Define RAG Prompt Function

# Ensure logger is available

def build_rag_prompt(question, context_chunks):
    """
    Builds a prompt for the generator LLM including the question and
    retrieved context chunks.

    Args:
        question (str): The user's question.
        context_chunks (list): A list of relevant context strings retrieved
                               from the vector search.

    Returns:
        str: A formatted prompt string ready for the LLM.
    """
    if not context_chunks:
         logger.warning("No context chunks provided to build_rag_prompt. Prompt will lack context.")
         # Basic prompt if no context is found
         # Adjust based on how you want the LLM to respond in this case
         return f"Question: {question}\nAnswer:"

    # Combine the context chunks into a single block
    # Using triple newlines and a separator for clarity
    context_string = "\n\n---\n\n".join(context_chunks)

    # Create the prompt structure for instruction-following models like Flan-T5
    # Instruct the model clearly to use *only* the provided context
    prompt = f"""Answer the following question based strictly on the context provided below. If the information needed to answer the question is not present in the context, state that you cannot answer based on the context.

Context:
{context_string}

---
Question: {question}

Answer based only on the provided context:"""

    logger.info("RAG prompt created.")
    logger.debug(f"Prompt Preview (start):\n{prompt[:500]}...") # Log beginning of prompt
    return prompt

# --- Test the prompt building function ---
# Use variables from the previous cell's test
prompt_test_passed = False
if 'retrieved_context_chunks' in locals() and retrieved_context_chunks and \
   'test_question' in locals() and test_question:

    logger.info("--- Testing build_rag_prompt function ---")
    test_prompt = build_rag_prompt(test_question, retrieved_context_chunks)

    if test_prompt and "Context:" in test_prompt and "Question:" in test_prompt:
        logger.info("Prompt building test successful.")
        print("\n--- Generated RAG Prompt (Preview) ---")
        # Print the full prompt for verification - it includes the context
        print(test_prompt)
        print("\n--- End Prompt Preview ---")
        prompt_test_passed = True
    else:
         logger.error(f"Prompt building failed. Result: {test_prompt}")
         print("\nError: Failed to build RAG prompt correctly.")

    logger.info("--- Finished testing build_rag_prompt function ---")

else:
    logger.warning("Cannot test prompt building without retrieved context or test question from Cell 7.")
    print("\nWarning: Skipping prompt building test. Please ensure Cell 7 ran successfully and produced context.")
    # Allow proceeding but warn user
    prompt_test_passed = True # Allow script to continue, but RAG won't work without context

# Stop if the test should have run but failed
if not prompt_test_passed and ('retrieved_context_chunks' in locals() and 'test_question' in locals()):
    raise RuntimeError("RAG prompt building test failed. Cannot proceed.")

2025-04-19 07:21:40,297 - INFO - --- Testing build_rag_prompt function ---
2025-04-19 07:21:40,300 - INFO - RAG prompt created.
2025-04-19 07:21:40,300 - INFO - Prompt building test successful.
2025-04-19 07:21:40,301 - INFO - --- Finished testing build_rag_prompt function ---



--- Generated RAG Prompt (Preview) ---
Answer the following question based strictly on the context provided below. If the information needed to answer the question is not present in the context, state that you cannot answer based on the context.

Context:
Scheme Name: Jagananna Chedodu
Section: Benefits

The beneficiary will be provided a one-time amount of ₹ 10,000 per year for a period of 5 years i.e., ₹ 50,000/- in five installments.
This fund can be utilized by the beneficiaries for purchasing tools, equipment, and other essentials to grow their source of income and work establishment.

---

Scheme Name: Jagananna Chedodu
Section: Details

"Jagananna Chedodu" is a State-Funded Social Welfare Scheme by the B.C. Welfare Department, Govt. of Andhra Pradesh. The scheme is only for tailors (All communities), Rajakas (washermen), and Nayee Brahmins (Barbers) of the state. The beneficiary will be provided a one-time amount of ₹ 10,000 per year for a period of 5 years i.e., ₹ 50,000/- in 

In [None]:
# Cell 9: Define Generation Function (Using Greedy Decoding)

# Ensure generator_model, tokenizer, logger, device are available from Cell 6
# Ensure torch is imported

import torch # For torch.no_grad()

def generate_answer(prompt, generator_model, tokenizer, max_new_tokens=200):
    """
    Generates an answer based on the provided prompt using the loaded LLM.
    Uses greedy decoding for potentially more factual extraction.

    Args:
        prompt (str): The complete RAG prompt including context and question.
        generator_model: The loaded Hugging Face Seq2Seq model (e.g., Flan-T5).
        tokenizer: The loaded tokenizer corresponding to the model.
        max_new_tokens (int): The maximum number of tokens to generate for the answer.

    Returns:
        str: The generated answer string, or an error message.
    """
    if not all([prompt, generator_model, tokenizer]):
        logger.error("Missing required arguments for generate_answer.")
        return "Error: Model/Tokenizer/Prompt not provided."

    logger.info("Generating answer (using greedy decoding)...")
    try:
        inputs = tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=1024
        ).to(device)

        with torch.no_grad():
            # --- MODIFIED GENERATION PARAMS ---
            outputs = generator_model.generate(
                **inputs,
                max_new_tokens=max_new_tokens,
                do_sample=False # Turn off sampling for greedy decoding
                # Removed temperature, top_p as they require do_sample=True
            )
            # --- END MODIFICATION ---

        answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

        logger.info("Answer generated successfully.")
        logger.debug(f"Raw Generated Answer: {answer}")
        return answer.strip()

    except Exception as e:
        logger.error(f"Error during answer generation: {e}", exc_info=True)
        return "Error: Could not generate answer due to an exception."

# --- Test the generation function ---
generation_test_passed = False
if 'generator_model' in locals() and generator_model and \
   'tokenizer' in locals() and tokenizer and \
   'test_prompt' in locals() and test_prompt: # Use prompt from Cell 8 test

    logger.info("--- Testing generate_answer function (Greedy Decoding) ---")
    print("\nGenerating answer for the test question (greedy decoding)...")
    generated_answer = generate_answer(test_prompt, generator_model, tokenizer)

    if generated_answer and "Error:" not in generated_answer:
        logger.info("Generation test successful.")
        print("\n--- Generated Answer (Test - Greedy) ---")
        print(generated_answer)
        print("--- End Generated Answer (Test) ---")
        generation_test_passed = True
    else:
         logger.error(f"Generation test failed. Result: {generated_answer}")
         print(f"\nError: Failed to generate answer. Result: {generated_answer}")

    logger.info("--- Finished testing generate_answer function ---")

else:
    logger.warning("Cannot test generation without model, tokenizer, or test prompt from previous cells.")
    print("\nWarning: Skipping generation test.")
    generation_test_passed = True

if not generation_test_passed and ('generator_model' in locals() and 'tokenizer' in locals() and 'test_prompt' in locals()):
    raise RuntimeError("Answer generation test failed. Cannot proceed.")

2025-04-19 07:21:48,266 - INFO - --- Testing generate_answer function (Greedy Decoding) ---
2025-04-19 07:21:48,271 - INFO - Generating answer (using greedy decoding)...



Generating answer for the test question (greedy decoding)...


2025-04-19 07:21:57,189 - INFO - Answer generated successfully.
2025-04-19 07:21:57,190 - INFO - Generation test successful.
2025-04-19 07:21:57,191 - INFO - --- Finished testing generate_answer function ---



--- Generated Answer (Test - Greedy) ---
10,000 per year for a period of 5 years i.e.,  50,000/- in five installments
--- End Generated Answer (Test) ---


In [None]:
# Cell 10: End-to-End QA Function and Test

# Ensure all components are available:
# embedding_model, faiss_index, chunk_texts,
# generator_model, tokenizer, logger, device
# Ensure functions retrieve_context, build_rag_prompt, generate_answer are defined

import time
import pprint # For nice printing if debugging context

def answer_question(user_question, top_k=3):
    """
    Answers a user question using the full RAG pipeline:
    1. Retrieve relevant context chunks.
    2. Build a prompt with context and question.
    3. Generate an answer using the LLM.

    Args:
        user_question (str): The question to answer.
        top_k (int): The number of context chunks to retrieve.

    Returns:
        str: The generated answer.
    """
    logger.info(f"Received question: '{user_question}'")
    start_time = time.time()

    # 1. Retrieve Context
    # Using the function defined in Cell 7
    context_chunks = retrieve_context(
        question=user_question,
        embedding_model=embedding_model,
        index=faiss_index,
        chunks_list=chunk_texts,
        top_k=top_k
    )

    if not context_chunks:
         logger.warning("No relevant context found for the question.")
         # Handle case with no context - maybe return a specific message
         # return "I couldn't find specific information about that topic in the available scheme data."
         # Or try generating based on question alone (LLM might refuse based on prompt)
         prompt = f"Question: {user_question}\nAnswer:"
    else:
         logger.info(f"Retrieved {len(context_chunks)} context chunks.")
         # --- Optional: Print retrieved context for debugging ---
         # print("\n--- Retrieved Context (Debug) ---")
         # pp = pprint.PrettyPrinter(indent=2)
         # for i, chunk in enumerate(context_chunks):
         #     print(f"-- Chunk {i+1} --\n{chunk[:300]}...\n")
         # print("---------------------------------")
         # --- End Optional ---

         # 2. Build Prompt
         # Using the function defined in Cell 8
         prompt = build_rag_prompt(user_question, context_chunks)

    # 3. Generate Answer
    # Using the function defined in Cell 9 (the greedy decoding version)
    final_answer = generate_answer(prompt, generator_model, tokenizer)

    end_time = time.time()
    logger.info(f"Answer generated in {end_time - start_time:.2f} seconds.")

    return final_answer

# --- Test the end-to-end function ---
# Ensure pipeline components are ready from previous cells
if 'faiss_index' in locals() and 'generator_model' in locals():
    logger.info("--- Testing End-to-End QA Function ---")

    test_questions = [
        "What financial assistance is provided by Jagananna Chedodu?",
        "Who is eligible for the Dr. Ambedakar Post-Matric scholarship for EBC students?",
        "How do I apply for Snehasanthwanam?", # Kerala scheme
        "What documents are needed for the Post-Matric scholarship for EBC students?",
        "Is there a scheme related to pensions for old age?", # General topic
        "What is the objective of the PM-KISAN scheme?", # Check if PM-KISAN was in the 100?
        "What is the capital of India?" # Out of domain test
    ]

    for q in test_questions:
        print(f"\n❓ Question: {q}")
        # Using the answer_question function
        answer = answer_question(q)
        print(f"\n💬 Answer: {answer}")
        print("="*50)
        # Optional small delay
        # time.sleep(1)

    logger.info("--- Finished End-to-End QA Test ---")

else:
    logger.error("Cannot run end-to-end test, required components (index/model) not available.")
    print("\nError: Cannot run QA test. Please ensure previous cells ran successfully.")

2025-04-19 07:22:13,299 - INFO - --- Testing End-to-End QA Function ---
2025-04-19 07:22:13,301 - INFO - Received question: 'What financial assistance is provided by Jagananna Chedodu?'
2025-04-19 07:22:13,302 - INFO - Retrieving top 3 context chunks for question: 'What financial assistance is provided by Jagananna Chedodu?...'



❓ Question: What financial assistance is provided by Jagananna Chedodu?


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-04-19 07:22:13,349 - INFO - Retrieved 3 chunks from index.
2025-04-19 07:22:13,349 - INFO - Retrieved 3 context chunks.
2025-04-19 07:22:13,350 - INFO - RAG prompt created.
2025-04-19 07:22:13,359 - INFO - Generating answer (using greedy decoding)...
2025-04-19 07:22:22,628 - INFO - Answer generated successfully.
2025-04-19 07:22:22,631 - INFO - Answer generated in 9.33 seconds.
2025-04-19 07:22:22,632 - INFO - Received question: 'Who is eligible for the Dr. Ambedakar Post-Matric scholarship for EBC students?'
2025-04-19 07:22:22,634 - INFO - Retrieving top 3 context chunks for question: 'Who is eligible for the Dr. Ambedakar Post-Matric scholarship for EBC students?...'



💬 Answer: 10,000 per year for a period of 5 years i.e.,  50,000/- in five installments

❓ Question: Who is eligible for the Dr. Ambedakar Post-Matric scholarship for EBC students?


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-04-19 07:22:22,694 - INFO - Retrieved 3 chunks from index.
2025-04-19 07:22:22,695 - INFO - Retrieved 3 context chunks.
2025-04-19 07:22:22,696 - INFO - RAG prompt created.
2025-04-19 07:22:22,713 - INFO - Generating answer (using greedy decoding)...
2025-04-19 07:22:37,375 - INFO - Answer generated successfully.
2025-04-19 07:22:37,376 - INFO - Answer generated in 14.74 seconds.
2025-04-19 07:22:37,377 - INFO - Received question: 'How do I apply for Snehasanthwanam?'
2025-04-19 07:22:37,378 - INFO - Retrieving top 3 context chunks for question: 'How do I apply for Snehasanthwanam?...'



💬 Answer: Indian nationals belonging to the General Category (Other than Schedule Caste, Schedule Tribe and Other Backward Classes).

❓ Question: How do I apply for Snehasanthwanam?


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-04-19 07:22:37,419 - INFO - Retrieved 3 chunks from index.
2025-04-19 07:22:37,419 - INFO - Retrieved 3 context chunks.
2025-04-19 07:22:37,420 - INFO - RAG prompt created.
2025-04-19 07:22:37,431 - INFO - Generating answer (using greedy decoding)...
2025-04-19 07:22:42,662 - INFO - Answer generated successfully.
2025-04-19 07:22:42,663 - INFO - Answer generated in 5.28 seconds.
2025-04-19 07:22:42,664 - INFO - Received question: 'What documents are needed for the Post-Matric scholarship for EBC students?'
2025-04-19 07:22:42,665 - INFO - Retrieving top 3 context chunks for question: 'What documents are needed for the Post-Matric scholarship for EBC students?...'



💬 Answer: The application duly filled out should be submitted to the Kasaragod District Collector Office

❓ Question: What documents are needed for the Post-Matric scholarship for EBC students?


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-04-19 07:22:42,705 - INFO - Retrieved 3 chunks from index.
2025-04-19 07:22:42,707 - INFO - Retrieved 3 context chunks.
2025-04-19 07:22:42,707 - INFO - RAG prompt created.
2025-04-19 07:22:42,716 - INFO - Generating answer (using greedy decoding)...
2025-04-19 07:22:50,469 - INFO - Answer generated successfully.
2025-04-19 07:22:50,469 - INFO - Answer generated in 7.80 seconds.
2025-04-19 07:22:50,471 - INFO - Received question: 'Is there a scheme related to pensions for old age?'
2025-04-19 07:22:50,471 - INFO - Retrieving top 3 context chunks for question: 'Is there a scheme related to pensions for old age?...'



💬 Answer: Income Certificate. Tuition Fee Receipt. Last academic qualification certificate. Bank Details of the applicant or of the Parent/Guardian

❓ Question: Is there a scheme related to pensions for old age?


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-04-19 07:22:50,512 - INFO - Retrieved 3 chunks from index.
2025-04-19 07:22:50,513 - INFO - Retrieved 3 context chunks.
2025-04-19 07:22:50,513 - INFO - RAG prompt created.
2025-04-19 07:22:50,521 - INFO - Generating answer (using greedy decoding)...
2025-04-19 07:22:52,143 - INFO - Answer generated successfully.
2025-04-19 07:22:52,144 - INFO - Answer generated in 1.67 seconds.
2025-04-19 07:22:52,146 - INFO - Received question: 'What is the objective of the PM-KISAN scheme?'
2025-04-19 07:22:52,147 - INFO - Retrieving top 3 context chunks for question: 'What is the objective of the PM-KISAN scheme?...'



💬 Answer: Yes

❓ Question: What is the objective of the PM-KISAN scheme?


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-04-19 07:22:52,190 - INFO - Retrieved 3 chunks from index.
2025-04-19 07:22:52,190 - INFO - Retrieved 3 context chunks.
2025-04-19 07:22:52,191 - INFO - RAG prompt created.
2025-04-19 07:22:52,200 - INFO - Generating answer (using greedy decoding)...
2025-04-19 07:23:05,218 - INFO - Answer generated successfully.
2025-04-19 07:23:05,219 - INFO - Answer generated in 13.07 seconds.
2025-04-19 07:23:05,220 - INFO - Received question: 'What is the capital of India?'
2025-04-19 07:23:05,221 - INFO - Retrieving top 3 context chunks for question: 'What is the capital of India?...'



💬 Answer: To promote women's entrepreneurship and provide them with financial assistance to start or expand their businesses

❓ Question: What is the capital of India?


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-04-19 07:23:05,265 - INFO - Retrieved 3 chunks from index.
2025-04-19 07:23:05,266 - INFO - Retrieved 3 context chunks.
2025-04-19 07:23:05,267 - INFO - RAG prompt created.
2025-04-19 07:23:05,279 - INFO - Generating answer (using greedy decoding)...
2025-04-19 07:23:08,531 - INFO - Answer generated successfully.
2025-04-19 07:23:08,532 - INFO - Answer generated in 3.31 seconds.
2025-04-19 07:23:08,533 - INFO - --- Finished End-to-End QA Test ---



💬 Answer: Cannot answer


In [None]:
# Cell 11: Interactive Q&A Loop (Suppressing INFO Logs)

# Ensure the start_qa_session_simple function is defined from the previous step
# Ensure logger object exists from Cell 1 setup
# Ensure logging module is imported

import logging

# --- Function Definition (Keep from previous step) ---
def start_qa_session_simple():
    """Starts an interactive loop to ask questions to the RAG model (simplified console output)."""
    print("\n--- MyScheme QA Bot ---")
    print("Ask questions about the scraped government schemes.")
    print("Type 'quit' or 'exit' anytime to stop.")
    print("-" * 25)

    if 'answer_question' not in globals():
        print("ERROR: The 'answer_question' function is not defined.")
        print("Please ensure Cell 10 was run successfully.")
        return
    if not all(map(lambda x: x in globals() and globals()[x] is not None,
                   ['embedding_model', 'faiss_index', 'chunk_texts',
                    'generator_model', 'tokenizer'])):
         print("WARNING: Some necessary components (models, index, data) might be missing.")

    while True:
        try:
            user_question = input("\n❓ Your Question: ")
            if user_question.strip().lower() in ['quit', 'exit']:
                print("\nExiting QA Bot. Goodbye!")
                break
            if not user_question.strip(): continue

            print("🧠 Thinking...")
            model_answer = answer_question(user_question) # This will still log internally if logger level allows

            print("\n💬 Model Answer:")
            print(model_answer)
            print("-" * 25)

        except EOFError:
             print("\nExiting QA Bot (EOF detected).")
             break
        except KeyboardInterrupt:
             print("\nExiting QA Bot (Interrupted by user).")
             break
        except Exception as e:
             print(f"\nAn error occurred: {e}")
             # Log the error even if INFO is suppressed
             if 'logger' in globals():
                 logger.error(f"Error during QA loop: {e}", exc_info=True)


# --- Start the interactive session with logging level adjusted ---

# Get the root logger (or your specific logger if named differently in Cell 1)
qabot_logger = logging.getLogger()
original_level = qabot_logger.level # Store the original level

print("Starting interactive session (suppressing INFO logs)...")

# Temporarily set logging level higher to hide INFO messages
# Only WARNING, ERROR, CRITICAL messages from underlying functions will show
qabot_logger.setLevel(logging.WARNING)

try:
    # Call the interactive loop function
    start_qa_session_simple()
finally:
    # IMPORTANT: Reset logging level back to original after the loop finishes or errors
    print(f"\nResetting logging level to {logging.getLevelName(original_level)}.")
    qabot_logger.setLevel(original_level)

Starting interactive session (suppressing INFO logs)...

--- MyScheme QA Bot ---
Ask questions about the scraped government schemes.
Type 'quit' or 'exit' anytime to stop.
-------------------------

❓ Your Question: hi
🧠 Thinking...

💬 Model Answer:
Can't answer
-------------------------

❓ Your Question: how are you
🧠 Thinking...

💬 Model Answer:
Cannot answer
-------------------------

❓ Your Question: scheme
🧠 Thinking...

💬 Model Answer:
Assistance Scheme for Handicraft Artisans
-------------------------

❓ Your Question: exit

Exiting QA Bot. Goodbye!

Resetting logging level to INFO.
