In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!pip install langchain langchain-community chromadb pypdf sentence-transformers openai -q

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m59.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.7/20.7 MB[0m [31m126.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m323.9/323.9 kB[0m [31m32.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m278.2/278.2 kB[0m [31m28.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m92.1 MB/s[0m eta [36m0:00:

In [None]:
# Key metrics (extracted from Birlasoft Annual Report 2024-25)
# Revenue_Cr: 5,375.2
# Revenue_USD_Mn: 635.4
# PAT_Cr: 5,375.2
# Headcount: 11,930

import os
from google.colab import userdata
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.memory import ConversationBufferMemory
from langchain_community.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.schema import Document

# --- Step 3: Set up the API Key and Environment ---
# IMPORTANT: Before running, you must add your OpenRouter API key to Colab's secrets.
# 1. Click the 'Key' icon on the left sidebar.
# 2. Add a new secret named "OPENROUTER_API_KEY" and paste your key as the value.
try:
    os.environ["OPENROUTER_API_KEY"] = userdata.get("OPENROUTER_API_KEY")
except Exception as e:
    print("ERROR: Could not find the OPENROUTER_API_KEY secret.")
    print("Please add your OpenRouter API key to Colab's secrets (on the left sidebar) and try again.")
    # Stop execution if the key is not found
    raise SystemExit(e)


# --- Step 4: Load and Process the PDF Document ---
# Updated the pdf_path to the new file you provided.
pdf_path = "/content/birlasoft_annual-report-2024-25.pdf"
if not os.path.exists(pdf_path):
    print(f"ERROR: The file '{pdf_path}' was not found.")
    print("Please upload the 'RIL-IAR-2025.pdf' file to your Colab session.")
else:
    print("Loading and processing the PDF... this may take a moment.")
    # Load the PDF
    loader = PyPDFLoader(pdf_path)
    pages = loader.load_and_split()

    # Split the document into smaller chunks for processing
    pdf_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    docs = pdf_splitter.split_documents(pages)
    documents = [Document(page_content=doc.page_content) for doc in docs]

    # --- Step 5: Create Text Embeddings and Vector Store ---
    # This converts the text chunks into numerical vectors for similarity searching.
    print("Creating text embeddings and vector store...")
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs={'device': 'cpu'}
    )
    vector_db = Chroma.from_documents(
        documents,
        embedding=embeddings
    )

    # --- Step 6: Set Up the Conversational AI Model ---
    # This configures the chatbot's "brain" and memory.
    print("Setting up the conversational AI...")
    # Set up conversational memory to remember the chat history
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        return_messages=True
    )

    # Initialize the Language Model (LLM) through OpenRouter
    llm = ChatOpenAI(
        model="openai/gpt-3.5-turbo",
        temperature=0.2,
        openai_api_base="https://openrouter.ai/api/v1",
        max_tokens=500,
        openai_api_key=os.environ["OPENROUTER_API_KEY"]
    )

    # Combine the retriever (from the vector store) and the LLM into a conversational chain
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vector_db.as_retriever(),
        memory=memory
    )

    print("\n✅ Setup complete! The chatbot is ready.")
    print("You can now ask questions about the Reliance Industries 2024-25 Annual Report.")
    print("Type 'Exit' to end the chat.")
    print("-" * 50)

    # --- Step 7: Start the Real-time Interaction Loop ---
    while True:
        try:
            question = input("User: ")
            if question.lower().strip() == "exit":
                print("Bot: Thank you for chatting. Goodbye!")
                break
            if not question.strip():
                continue

            # Get the answer from the QA chain
            answer = qa_chain({"question": question})
            print("Bot:", answer["answer"])

        except Exception as e:
            print(f"An error occurred: {e}")
            break


Loading and processing the PDF... this may take a moment.
Creating text embeddings and vector store...


  embeddings = HuggingFaceEmbeddings(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Setting up the conversational AI...


  memory = ConversationBufferMemory(
  llm = ChatOpenAI(



✅ Setup complete! The chatbot is ready.
You can now ask questions about the Reliance Industries 2024-25 Annual Report.
Type 'Exit' to end the chat.
--------------------------------------------------
User: what is birlasoft company is all about?


  answer = qa_chain({"question": question})


Bot: Birlasoft Limited is a global provider of IT services and solutions that drive digital and business transformation across various industries. The company offers services in areas such as Data & Analytics, Digital, Cloud services, Enterprise Resource Planning (ERP), and Infrastructure Management Services (IMS). Birlasoft focuses on emerging technologies like AI/GenAI to help businesses stay ahead in the digital landscape.
User: is birlasoft a good company?
Bot: Based on the information provided, Birlasoft appears to be a reputable company with a global presence and subsidiaries in various countries. They have a significant influence over other entities and provide information about their products and services on their official website. Additionally, they have mechanisms in place to inform consumers about safe and responsible product usage.
User: Thank you for chatting. Goodbye!
Bot: Based on the information provided from the Annual Report 2024-25, Birlasoft seems to be a well-perfo