
# Task 1 - RAG Model for QA Bot  
**Intern Name:** VINO K

This notebook demonstrates a Retrieval Augmented Generation (RAG) QA Bot for a business using:
- OpenAI API for embeddings and generation
- Pinecone DB for vector search
- LangChain for orchestrating RAG

Ensure your `.env` file contains the following keys:
- `OPENAI_API_KEY`
- `PINECONE_API_KEY`
- `PINECONE_ENVIRONMENT`


In [3]:
# -*- coding: utf-8 -*-
"""
Optimized RAG Model for QA Bot (Colab Notebook)

This Colab notebook demonstrates an optimized Retrieval Augmented Generation (RAG) model
for a Question-Answering (QA) bot, incorporating Contextual Compression using Relevance Filtering.
It leverages:
- OpenAI API for text embeddings and language model generation.
- Pinecone as a vector database for efficient similarity search.
- LangChain for orchestrating the RAG workflow (text splitting, document handling, summarization).

Before running:
1. Ensure you have an OpenAI API Key and a Pinecone API Key and Environment.
2. Set these as environment variables (e.g., in a `.env` file or Colab secrets).
"""

# ✅ 1. Setup and Installation
# Install necessary libraries. The -qU flags ensure quiet installation and upgrade.
!pip install -qU openai pinecone-client langchain langchain-openai langchain-pinecone tiktoken python-dotenv

import os
from dotenv import load_dotenv
from pinecone import Pinecone, Index
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain.chains.summarize import load_summarize_chain # New import for summarization
import time

print("✅ Libraries installed and imported successfully by VINO K.")

# ✅ 2. Load Environment Variables
# It's recommended to store API keys in a .env file for security.
load_dotenv()

# Retrieve API keys from environment variables
openai_api_key = os.getenv("OPENAI_API_KEY")
pinecone_api_key = os.getenv("PINECONE_API_KEY")
pinecone_environment = os.getenv("PINECONE_ENVIRONMENT")

# Basic validation for API keys
if not openai_api_key:
    raise ValueError("OPENAI_API_KEY missing.")
if not pinecone_api_key or not pinecone_environment:
    raise ValueError("PINECONE_API_KEY or PINECONE_ENVIRONMENT missing.")

print("✅ Environment variables loaded.")

# ✅ 3. Initialize OpenAI and Pinecone Clients
# Initialize Pinecone
try:
    pinecone = Pinecone(api_key=pinecone_api_key, environment=pinecone_environment)
    print("✅ Pinecone initialized.")
except Exception as e:
    print(f"Error initializing Pinecone: {e}")
    raise

# Initialize OpenAI Embeddings model
embeddings_model = OpenAIEmbeddings(openai_api_key=openai_api_key, model="text-embedding-ada-002")
print("✅ OpenAI Embeddings model initialized.")

# Initialize OpenAI Chat model for generation (main LLM for answering)
llm = ChatOpenAI(openai_api_key=openai_api_key, model_name="gpt-3.5-turbo", temperature=0.7)
print("✅ OpenAI Chat model initialized.")

# Initialize a lightweight LLM for summarization (for Contextual Compression)
# Using gpt-3.5-turbo-instruct for summarization as suggested in the Canvas.
llm_for_summarization = ChatOpenAI(openai_api_key=openai_api_key, model_name="gpt-3.5-turbo-instruct", temperature=0.3)
print("✅ Lightweight LLM for summarization initialized.")

# ✅ 4. Sample Business Data
business_data_content = """
Prepared by VINO K

Sample Product:
- Name: AI Assistant
- Price: $500
- Features: Natural language understanding, Context awareness, Multi-turn dialogue

Return Policy:
- Returns accepted within 14 days if unopened.
- For opened products, returns are not accepted due to software licensing.
- Customers must provide original receipt for any return.
- Refunds are processed within 5-7 business days.

Customer Support:
- Email: support@example.com
- Phone: 1-800-AI-HELP
- Hours: Monday - Friday, 9 AM - 5 PM EST

Shipping Information:
- Standard shipping: 3-5 business days.
- Express shipping: 1-2 business days, additional cost.
- Free standard shipping on orders over $1000.
"""

with open("business_data.txt", "w") as f:
    f.write(business_data_content)

print("✅ Sample business data loaded.")

# ✅ 5. Text Splitting
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, # Max characters per chunk
    chunk_overlap=100, # Overlap between chunks to maintain context
    length_function=len # Use character length
)

documents = [Document(page_content=business_data_content)]
chunks = text_splitter.split_documents(documents)

print(f"✅ Document split into {len(chunks)} chunks.")

# ✅ 6. Pinecone Index Setup
index_name = "vino-task1-index" # Using the same index name from your original code

# Check if index already exists and delete if it does (for fresh start in Colab)
if index_name in pinecone.list_indexes():
    print(f"Deleting existing index: {index_name}")
    pinecone.delete_index(index_name)
    time.sleep(1) # Give a moment for index deletion

# Create Pinecone index
# The dimension must match the output dimension of your embedding model (1536 for text-embedding-ada-002)
pinecone.create_index(index_name, dimension=1536, metric="cosine")
index = pinecone.Index(index_name)

print(f"✅ Pinecone index '{index_name}' created and connected.")

# ✅ 7. Embedding and Upserting
vectors_to_upsert = []
for i, chunk in enumerate(chunks):
    chunk_id = f"id_{i}"
    # Embed the text content of the chunk
    embedding = embeddings_model.embed_query(chunk.page_content)
    # Store the original text content in metadata for retrieval
    metadata = {"text": chunk.page_content} # Store original text in metadata
    vectors_to_upsert.append((chunk_id, embedding, metadata))

# Upsert embeddings to Pinecone in batches
batch_size = 100 # Adjust batch size based on memory and network
for i in range(0, len(vectors_to_upsert), batch_size):
    batch = vectors_to_upsert[i:i + batch_size]
    index.upsert(vectors=batch)
    # print(f"Upserted batch {i//batch_size + 1}/{(len(vectors_to_upsert) + batch_size - 1) // batch_size}")

print("✅ Embeddings upserted to Pinecone.")
print(f"Pinecone index description: {index.describe_index_stats()}")


# ✅ 8. RAG QA Function with Contextual Compression
def rag_qa_bot(query: str, top_k: int = 5, relevance_threshold: float = 0.7):
    """
    Performs Retrieval Augmented Generation with Contextual Compression.

    Args:
        query (str): The user's question.
        top_k (int): Number of top documents to retrieve initially from Pinecone.
        relevance_threshold (float): Cosine similarity score threshold for filtering.

    Returns:
        str: The answer generated by the LLM.
    """
    print(f"🔎 Query: {query}")
    query_embedding = embeddings_model.embed_query(query)

    # Retrieve documents from Pinecone
    results = index.query(vector=query_embedding, top_k=top_k, include_metadata=True)

    # 1. Relevance Filtering
    # Filter documents based on a relevance score threshold
    filtered_docs_for_summarization = []
    print(f"Filtering retrieved documents with relevance threshold > {relevance_threshold}...")
    for match in results.matches:
        if match.score > relevance_threshold:
            # Create a LangChain Document object for summarization chain
            filtered_docs_for_summarization.append(Document(page_content=match.metadata['text'], metadata={"score": match.score}))
            print(f"  - Kept document (score: {match.score:.2f}): {match.metadata['text'][:70]}...")
        else:
            print(f"  - Discarded document (score: {match.score:.2f}): {match.metadata['text'][:70]}...")

    if not filtered_docs_for_summarization:
        print("No relevant documents found after filtering.")
        return "No relevant data found in my knowledge base to answer that question."

    # 2. Contextual Compression using LangChain's summarize chain
    # Using 'map_reduce' chain type for summarization over multiple documents
    summary_chain = load_summarize_chain(llm_for_summarization, chain_type="map_reduce")
    print(f"Compressing {len(filtered_docs_for_summarization)} relevant chunks...")
    try:
        compressed_context = summary_chain.run(filtered_docs_for_summarization)
        print("Context compressed successfully.")
        # print(f"Compressed context:\n{compressed_context[:200]}...") # Uncomment to see compressed context
    except Exception as e:
        print(f"Error during contextual compression: {e}")
        # Fallback to using uncompressed context if summarization fails
        compressed_context = "\n\n".join([doc.page_content for doc in filtered_docs_for_summarization])
        print("Falling back to uncompressed context.")


    # Augment prompt with the compressed context
    prompt = f"""
    You are a helpful QA assistant for Acme Innovations Inc.
    Answer the following question based ONLY on the provided context.
    If the answer is not found in the context, clearly state that you don't know or that the information is not available.

    Context:
    {compressed_context}

    Question: {query}

    Answer:
    """
    # print(f"Full prompt sent to main LLM:\n{prompt}") # Uncomment to see the full prompt

    # LLM Generation
    response = llm.invoke(prompt)
    return response.content

# ✅ 9. Test QA Bot
queries = [
    "What is the price of AI Assistant?",
    "What is the return policy for opened products?",
    "How long does standard shipping take?",
    "Who is VINO K?", # This info is in the data (prepared by VINO K)
    "What is the capital of France?" # This info is NOT in the data
]

for q in queries:
    answer = rag_qa_bot(q)
    print(f"🗨️ {q}\n💡 {answer}\n{'-'*30}")
    time.sleep(1) # Add a small delay for readability and to avoid hitting rate limits

print("\n✅ Task 1 completed professionally by VINO K with Contextual Compression!")

# Clean up: Delete the Pinecone index when done (optional, but good for managing resources)
# print(f"\nDeleting Pinecone index '{index_name}'...")
# pinecone.delete_index(index_name)
# print("Index deleted.")


ImportError: cannot import name 'Index' from 'pinecone' (I:\Anaconda\Lib\site-packages\pinecone\__init__.py)