In [3]:
!pip install langchain langchain-community chromadb pypdf sentence-transformers openai -q


In [4]:
# ============================================================
# 📘 BPCL Annual Report Chatbot using LangChain + OpenRouter
# ============================================================


#  Import Dependencies ---
import os
import warnings
from google.colab import userdata
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.memory import ConversationBufferMemory
from langchain_community.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.schema import Document

warnings.filterwarnings("ignore")

#  Set up the API Key ---
try:
    os.environ["OPENROUTER_API_KEY"] = userdata.get("OPENROUTER_API_KEY")
except Exception as e:
    print("❌ ERROR: Could not find the OPENROUTER_API_KEY secret.")
    print("Please add your OpenRouter API key to Colab's secrets (🔑 icon on the left sidebar).")
    raise SystemExit(e)

# Load and Process the PDF Document ---
pdf_path = "/content/bpcl-annual-report-2024-25.pdf"

if not os.path.exists(pdf_path):
    print(f"❌ ERROR: The file '{pdf_path}' was not found.")
    print("Please upload the 'bpcl-annual-report-2024-25.pdf' file to your Colab session.")
else:
    print("📄 Loading and processing the BPCL Annual Report... please wait.")

    # Load the PDF pages
    loader = PyPDFLoader(pdf_path)
    pages = loader.load_and_split()

    # Split the document into chunks for better embedding quality
    pdf_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=150,
        length_function=len
    )

    docs = pdf_splitter.split_documents(pages)
    documents = [Document(page_content=doc.page_content) for doc in docs]

    #  Create Text Embeddings and Vector Store ---
    print("⚙️ Creating text embeddings and vector store...")
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs={'device': 'cpu'}
    )

    vector_db = Chroma.from_documents(
        documents,
        embedding=embeddings
    )

    #  Set Up the Conversational AI Model ---
    print("🧠 Setting up the conversational AI model...")
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        return_messages=True
    )

    # Initialize OpenRouter-compatible ChatOpenAI model
    llm = ChatOpenAI(
        model_name="gpt-3.5-turbo",
        temperature=0.2,
        openai_api_key=os.environ["OPENROUTER_API_KEY"],
        openai_api_base="https://openrouter.ai/api/v1",
        max_tokens=500
    )

    # Combine retriever + model + memory
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vector_db.as_retriever(),
        memory=memory
    )

    print("\n Setup complete! The chatbot is ready to use.")
    print("💬 Ask me anything about the *BPCL Annual Report 2024-25*.")
    print("Type 'Exit' anytime to end the chat.")
    print("-" * 60)

    # --- Step 7: Start the Chat Loop ---
    while True:
        try:
            question = input("User: ")
            if question.lower().strip() == "exit":
                print("Bot: 👋 Thank you for chatting. Goodbye!")
                break
            if not question.strip():
                continue

            # Query the QA chain
            answer = qa_chain({"question": question})
            print("Bot:", answer["answer"])

        except Exception as e:
            print(f"⚠️ An error occurred: {e}")
            break


📄 Loading and processing the BPCL Annual Report... please wait.
⚙️ Creating text embeddings and vector store...
🧠 Setting up the conversational AI model...

 Setup complete! The chatbot is ready to use.
💬 Ask me anything about the *BPCL Annual Report 2024-25*.
Type 'Exit' anytime to end the chat.
------------------------------------------------------------
User: HI
Bot: Hello! How can I assist you today?
User: Can you tell me about BPCl
Bot: BPCL, Bharat Petroleum Corporation Limited, is a prominent Oil and Gas company that serves both retail and bulk customers. It has an extensive network of retail outlets and LPG distributorships. The company has internal committees at its regions and refineries to ensure confidentiality in handling complaints. BPCL conducts Life Cycle Perspective/Assessments for its products, with details available in their Annual Report for investors. Additionally, BPCL has a Customer Care System for feedback and addresses complaints through the CPGRAMS portal. The