In [2]:
import os
from google import genai
from pinecone import Pinecone
from dotenv import load_dotenv
import re
load_dotenv()

PINECONE_API_KEY= os.getenv('PINECONE_API_KEY')
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')

pc = Pinecone(api_key=PINECONE_API_KEY) # creating the pinecone database instance
INDEX_NAME = "qa-chatbot"
NAMESPACE = "bizz-docs-sample"

In [3]:
# Dummy data
business_documents = [
    # OPENAI DOCS
    {
        "_id": "openai_doc1",
        "content": "ChatGPT is OpenAI's conversational AI assistant, capable of engaging in natural, context-aware dialogue across a wide range of topics. It is used for tasks like writing assistance, tutoring, customer support, and ideation. ChatGPT supports multimodal input (text and images), code generation, and advanced memory features for personalized experiences.",
        "category": "product_info",
        "product_name": "ChatGPT"
    },
    {
        "_id": "openai_doc2",
        "content": "OpenAI Codex is the engine behind GitHub Copilot and is designed to translate natural language into code. It supports over a dozen programming languages and enables developers to build applications faster by generating code snippets, completing functions, and even writing entire modules based on descriptive input.",
        "category": "product_info",
        "product_name": "Codex"
    },
    {
        "_id": "openai_doc3",
        "content": "DALL·E is a generative AI model by OpenAI that creates images from textual prompts. With support for inpainting, editing, and realistic rendering, DALL·E empowers artists, marketers, and developers to turn creative ideas into visual content. It's particularly popular for concept art, design mockups, and marketing illustrations.",
        "category": "product_info",
        "product_name": "DALL·E"
    },
    {
        "_id": "openai_doc4",
        "content": "Whisper is OpenAI's automatic speech recognition (ASR) system trained on a large, diverse dataset of multilingual and multitask audio. It delivers high-accuracy transcription, translation, and speech-to-text capabilities. Whisper is open-source and used in voice assistants, accessibility apps, and real-time transcription tools.",
        "category": "product_info",
        "product_name": "Whisper"
    },

# META DOCS
    {
        "_id": "meta_doc1",
        "content": "Meta's Quest 3 is a mixed reality headset that offers a powerful VR and AR experience. Equipped with full-color passthrough, spatial audio, and Snapdragon XR2 Gen 2, it is ideal for gaming, collaboration, and immersive learning. It supports a wide range of apps from the Meta Quest Store and integrates with hand tracking for a controller-free experience.",
        "category": "product_info",
        "product_name": "Meta Quest 3"
    },
    {
        "_id": "meta_doc2",
        "content": "Threads is Meta's microblogging social platform designed for close-knit conversations and sharing real-time updates. Integrated with Instagram, Threads allows users to post text, images, and videos while following their interests. Its minimal interface and privacy-first design make it ideal for creators and casual users alike.",
        "category": "product_info",
        "product_name": "Threads"
    },
    {
        "_id": "meta_doc3",
        "content": "Llama 3 is Meta's latest open-source large language model (LLM), built for research and enterprise applications. It provides state-of-the-art performance on reasoning, coding, and multilingual benchmarks, and can be fine-tuned for various tasks such as summarization, chatbots, and content generation.",
        "category": "product_info",
        "product_name": "Llama 3"
    },

# AMAZON DOCS
    {
        "_id": "amazon_doc1",
        "content": "Amazon Web Services (AWS) is the world's most comprehensive cloud computing platform, offering over 200 services including compute, storage, networking, machine learning, and security. Businesses of all sizes rely on AWS for scalable infrastructure, rapid deployment, and global reach.",
        "category": "product_info",
        "product_name": "AWS"
    },
    {
        "_id": "amazon_doc2",
        "content": "Alexa is Amazon's intelligent voice assistant that powers Echo devices and other smart home integrations. With Alexa, users can control smart devices, play music, get weather updates, and access a wide range of skills. Alexa's AI continues to evolve, providing more natural conversations and improved contextual understanding.",
        "category": "product_info",
        "product_name": "Alexa"
    },
    {
        "_id": "amazon_doc3",
        "content": "Amazon Prime is a premium membership that offers exclusive benefits like free one-day delivery, Prime Video streaming, Prime Music, and early access to deals. Prime is designed to provide convenience, entertainment, and savings for frequent Amazon shoppers.",
        "category": "product_info",
        "product_name": "Amazon Prime"
    },

# PINECONE DOCS
    {
        "_id": "pinecone_doc1",
        "content": "Pinecone is a vector database service that enables high-speed, scalable similarity search for machine learning applications. It allows developers to store, update, and query vector embeddings in real time, making it ideal for applications like recommendation engines, semantic search, and fraud detection.",
        "category": "product_info",
        "product_name": "Pinecone Vector DB"
    },
    {
        "_id": "pinecone_doc2",
        "content": "Pinecone's hybrid search feature enables combined filtering and vector similarity search, allowing users to retrieve contextually relevant results while applying metadata constraints. This is particularly useful for e-commerce, knowledge retrieval, and personalized search solutions.",
        "category": "product_info",
        "product_name": "Hybrid Search"
    },
    {
        "_id": "pinecone_doc3",
        "content": "The Pinecone Serverless architecture eliminates the need to manage infrastructure, automatically scaling based on traffic and usage. With zero maintenance and built-in security, it simplifies deployment for AI-powered applications with minimal operational overhead.",
        "category": "product_info",
        "product_name": "Pinecone Serverless"
    }
]

In [None]:
def initialize_upload_db(documents,INDEX_NAME,NAMESPACE):
    # Creating an index in the database
    if not pc.has_index(INDEX_NAME):
        pc.create_index_for_model(
            name=INDEX_NAME,
            cloud="aws",
            region="us-east-1",
            embed = {          # using the pinecone database inbuilt embedding model, which will embed the docs and the user question also, during semantic search.    
                "model":"llama-text-embed-v2",   # default embedding model
                "field_map": {"text": "content"}
            }
        )

    # go to pinecone and check the created index and copy paste the host url in the host 
    host = "https://qa-chatbot-aheg7k4.svc.aped-4627-b74a.pinecone.io" # pinecone instance using aws cloud.
    index = pc.Index(host=host)
    index.upsert_records(NAMESPACE,documents) # remember the namespace is "bizz-docs-sample"
    return index

In [5]:
def retrieve_data(user_message,index,NAMESPACE):
    query = {
        "inputs": {"text": user_message},
        "top_k": 5
    }

    results = index.search(
        namespace=NAMESPACE,
        query=query,
        fields=["content","category"]
    )

    retrieved_context = [{
        "id": hit['_id'],
        "category": hit['fields']['category'],
        "llm_context":hit['fields']['content'],
        } for hit in results['result']['hits']
    ]

    return retrieved_context


In [None]:
# formatting the gemini response into clean text.
def format_response(gemini_output):
    clean_text = re.sub(r"\*\*(.*?)\*\*", r"\1", gemini_output)
    clean_text = re.sub(r"^\s*[\*\-]\s*", "- ", clean_text, flags=re.MULTILINE)
    clean_text = re.sub(r'\n{2,}', '\n\n', clean_text)
    return clean_text.strip()

In [None]:
def build_prompt(user_message, retrieved_docs_cache,session_history):
    history_context = "\n".join([
        f"{turn['role'].capitalize()}: {turn['content']}" for turn in session_history[-3:]
    ])

    context_text = "\n".join([
        doc['llm_context'] for doc in retrieved_docs_cache
    ])

    prompt = f"""
    You are a Business Domain QA Chatbot powered by RAG.
    Answer the user's question based on the context provided below.
    Do NOT engage in casual, personal, or off-topic conversations.
    If unsure, say \"I don't have enough information to answer that.\"

    Context:
    {context_text}

    Conversation so far:
    {history_context}

    Current Question:
    {user_message}

    Answer:
    """
    return prompt


def bizz_chat(prompt):
    llm = genai.Client()
    response = llm.models.generate_content(
        model='gemini-2.5-flash',
        contents=prompt
    )
    return format_response(response.text)

In [None]:
def qa_chatbot():
    index = initialize_upload_db(business_documents, INDEX_NAME, NAMESPACE)
    if index is None:
        return

    session_history = []
    retrieved_docs_cache = []

    print("Welcome to the Business QA Chatbot. Type 'exit' to quit.")
    while True:
        user_message = input("\nEnter your query: ")
        if user_message.lower() in ['exit', 'quit']:
            print("Goodbye!")
            break

        session_history.append({"role": "user", "content": user_message})
        new_context = retrieve_data(user_message, index, NAMESPACE)
        retrieved_docs_cache.extend(new_context)

        prompt = build_prompt(user_message, retrieved_docs_cache, session_history)
        response = bizz_chat(prompt)
        print("\nResponse:\n",response)
        session_history.append({"role": "bot", "content": response})

In [None]:
qa_chatbot()