In [1]:
# --- Environment Setup ---
# Install required packages
%%capture
!pip install ibm-watsonx-ai==0.2.6 \
             langchain==0.1.16 \
             langchain-ibm==0.1.4 \
             transformers==4.41.2 \
             huggingface-hub==0.23.4 \
             sentence-transformers==2.5.1 \
             chromadb \
             wget==3.2 \
             --upgrade torch --index-url https://download.pytorch.org/whl/cpu


In [None]:
# --- Imports and Warning Suppression ---

# Suppress unwanted warnings
import warnings
def warn(*args, **kwargs):
    pass
warnings.warn = warn
warnings.filterwarnings('ignore')

# LangChain and document processing
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory

# IBM watsonx.ai and LangChain integration
from ibm_watsonx_ai.foundation_models import Model
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
from ibm_watsonx_ai.foundation_models.utils.enums import ModelTypes, DecodingMethods
from ibm_watson_machine_learning.foundation_models.extensions.langchain import WatsonxLLM

# Utilities
import wget


In [None]:
# --- Load Private Document ---

# Example placeholder for a private text document.
# In actual use, replace 'data/private_document.txt' with your own file.
from langchain.document_loaders import TextLoader

file_path = "data/private_document.txt"  # Local path (document not uploaded for confidentiality)
loader = TextLoader(file_path)

documents = loader.load()
print(f"Loaded {len(documents)} document(s) for processing.")


In [None]:
# --- (Optional) Inspect Document Content ---

# In practice, you might inspect the file before processing.
# Skipping actual content display here to preserve data privacy.

with open(file_path, "r") as file:
    contents = file.read()

print(f"Document loaded successfully. Total length: {len(contents)} characters.")


In [None]:
# --- Split Document into Chunks ---

# Load and split the document into manageable text chunks
loader = TextLoader(file_path)
documents = loader.load()

# Split into chunks for embedding and retrieval
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

print(f"Document split into {len(texts)} chunks.")


In [None]:
# --- Create Embeddings and Vector Store ---

# Generate embeddings for each text chunk and store them using ChromaDB
embeddings = HuggingFaceEmbeddings()
vector_store = Chroma.from_documents(texts, embeddings)

print("Documents embedded and stored in ChromaDB successfully.")


In [None]:
# --- Select LLM Model ---

# Choose the foundation model from IBM watsonx.ai
model_id = "ibm/granite-3-2b-instruct"

print(f"Selected model: {model_id}")


In [None]:
# --- Configure Model Generation Parameters ---

parameters = {
    GenParams.DECODING_METHOD: DecodingMethods.GREEDY,     # Deterministic output
    GenParams.MIN_NEW_TOKENS: 130,                         # Minimum length of generated response
    GenParams.MAX_NEW_TOKENS: 256,                         # Maximum length of generated response
    GenParams.TEMPERATURE: 0.5                              # Controls creativity/randomness
}

print("Model generation parameters configured.")


In [None]:
# --- IBM watsonx.ai Credentials and Project Configuration ---

# NOTE: API credentials are intentionally omitted for security and privacy.
# To run this notebook, configure your IBM Cloud credentials securely.

credentials = {
    "url": "your_watsonx_instance_url",
    "api_key": "your_ibm_watsonx_api_key"  # Replace with your own API key (kept private)
}

project_id = "your_project_id"

print("Credentials and project configuration initialized (sensitive info hidden).")


In [None]:
# --- Initialize the watsonx.ai Model ---

model = Model(
    model_id=model_id,
    params=parameters,
    credentials=credentials,
    project_id=project_id
)

print("watsonx.ai model initialized successfully.")


In [None]:
# --- Integrate watsonx.ai Model with LangChain ---

llm = WatsonxLLM(model=model)
print("LangChain LLM wrapper initialized successfully.")


In [None]:
# --- Build RetrievalQA Chain and Query the Model ---

# Create a retrieval-based QA chain using the LangChain + watsonx.ai LLM
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(),
    return_source_documents=False
)

# Example query
query = "What is the company's lunch policy?"
response = qa.invoke(query)

print("Query:", query)
print("\nResponse:\n", response["result"])


In [None]:
# --- Summarize the Entire Document ---

# You can reuse the same QA chain for summarization-style queries
summary_query = "Can you summarize the document for me?"
summary_response = qa.invoke(summary_query)

print("Summary Query:", summary_query)
print("\nSummary:\n", summary_response["result"])


In [None]:
# --- Define Custom Prompt Template ---

prompt_template = """
Use the information from the provided context to answer the question below.
If the answer cannot be found in the context, respond with "I don't know."
Do not attempt to fabricate an answer.

{context}

Question: {question}
"""

PROMPT = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
)

chain_type_kwargs = {"prompt": PROMPT}

print("Custom prompt template defined successfully.")


In [None]:
# --- Query with Custom Prompt Template ---

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(),
    chain_type_kwargs=chain_type_kwargs,
    return_source_documents=False
)

custom_query = "Can I eat in company vehicles?"
custom_response = qa.invoke(custom_query)

print("Query:", custom_query)
print("\nResponse:\n", custom_response["result"])


In [None]:
# --- Build Conversational Retrieval Chain (with Memory) ---

from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

# Initialize conversation memory
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# Create a conversational RAG chain
conversational_qa = ConversationalRetrievalChain.from_llm(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(),
    memory=memory,
    get_chat_history=lambda h: h,
    return_source_documents=False
)

# Example conversation
query = "What is the mobile policy?"
result = conversational_qa.invoke({"question": query, "chat_history": []})

print("User Query:", query)
print("\nAssistant Response:\n", result["answer"])


In [None]:
# --- Continue the Conversation ---

# Follow-up query that uses prior context from memory
follow_up_query = "Can you list the key points from it?"
follow_up_result = conversational_qa.invoke({"question": follow_up_query, "chat_history": []})

print("Follow-up Query:", follow_up_query)
print("\nAssistant Response:\n", follow_up_result["answer"])


In [None]:
## Wrap-up: Build the Intelligent Retrieval Agent

This final section defines an interactive **Document Q&A Agent** powered by Retrieval-Augmented Generation (RAG), LangChain, and IBM watsonx.ai.
The agent can retrieve information from your private documents, maintain conversation memory, and answer follow-up questions contextually â€” like a custom internal chatbot for document understanding.

Once initialized, simply type your queries below to interact with the agent.


In [None]:
# --- Interactive Q&A Chatbot ---

def start_chatbot():
    """
    A simple interactive chatbot powered by watsonx.ai, LangChain, and RAG.
    It retrieves answers from private documents and remembers previous context.
    """
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    conversational_qa = ConversationalRetrievalChain.from_llm(
        llm=llm,
        chain_type="stuff",
        retriever=vector_store.as_retriever(),
        memory=memory,
        get_chat_history=lambda h: h,
        return_source_documents=False
    )

    print("Private Document Q&A Chatbot (type 'exit' to quit)\n")

    while True:
        query = input("Question: ").strip()
        if query.lower() in ["quit", "exit", "bye"]:
            print("Answer: Goodbye!")
            break

        result = conversational_qa.invoke({"question": query, "chat_history": []})
        print("Answer:", result["answer"])
        print("-" * 80)

# Run the chatbot
start_chatbot()
