In [None]:
!pip install \
  langchain \
  langchain-community \
  langchain-core \
  langchain-google-genai \
  langchain-pinecone \
  pinecone-client \
  google-search-results \
  wikipedia \
  sentence-transformers \
  python-dotenv \
  tqdm \
  requests


In [1]:
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()
GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY")

# Initialize Gemini Flash LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",  # This internally uses Flash for fast responses
    google_api_key=GEMINI_API_KEY,
    convert_system_message_to_human=True,
    temperature=0.2
)

# Test it
response = llm.invoke("Explain the difference between Python lists and tuples in simple terms.")
print(response.content)




Imagine you have two containers for storing items:

*   **List:** Like a shopping list you can change. You can add items, remove items, or change the order of items.  It's **mutable** (changeable).  You write it using square brackets: `[apple, banana, cherry]`

*   **Tuple:** Like a set of coordinates (x, y). Once you define them, you can't change them. You can't add, remove, or change the order. It's **immutable** (unchangeable). You write it using parentheses: `(10, 20)`

**In short:**

*   **List:** Changeable, like a shopping list.
*   **Tuple:** Unchangeable, like coordinates.

**Why use tuples?**

*   **Safety:**  Guarantees data won't be accidentally modified.
*   **Speed:**  Tuples are generally a bit faster than lists because Python knows they won't change.
*   **Keys in Dictionaries:**  Tuples can be used as keys in dictionaries, while lists cannot (because dictionary keys must be immutable).


In [None]:
!pip install langchain_experimental


In [2]:
from langchain.agents import Tool
from langchain_experimental.tools.python.tool import PythonREPLTool
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import SerpAPIWrapper
from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
import os
load_dotenv()

# Initialize the Wikipedia tool
wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())

# Initialize the Python REPL tool
python_repl = PythonREPLTool()

# Initialize the SERP API tool
search = SerpAPIWrapper(serpapi_api_key=os.getenv("SERPAPI_API_KEY"))

# Bundle the tools
tools = [
    Tool(
        name="Python_REPL",
        func=python_repl.run,
        description="Useful for executing Python code for tasks such as math, logic, and plotting.",
    ),
    Tool(
        name="Wikipedia",
        func=wiki.run,
        description="Useful for retrieving factual and historical information from Wikipedia.",
    ),
    Tool(
        name="Web_Search",
        func=search.run,
        description="Useful for answering current event or factual queries using the SERP API.",
    ),
]


In [None]:
!pip install pypdf

In [3]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec
import uuid
import os

def create_pinecone_retriever_from_pdf(pdf_path: str, index_name="doc-chatbot") -> tuple:
    """
    Ingest a PDF file, upload to Pinecone under a unique namespace, and return a retriever + namespace.
    """
    # Init Pinecone
    pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

    # Load and split PDF
    loader = PyPDFLoader(pdf_path)
    docs = loader.load()
    
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    chunks = splitter.split_documents(docs)

    # Embedding model
    embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

    # Unique namespace
    namespace = f"session-{uuid.uuid4()}"

    # Upload to Pinecone
    vectorstore = PineconeVectorStore.from_documents(
        documents=chunks,
        embedding=embed_model,
        index_name=index_name,
        namespace=namespace
    )

    retriever = vectorstore.as_retriever()
    return retriever, namespace


  from tqdm.autonotebook import tqdm


In [4]:
pdf_path = "/Users/praddy/Documents/CODE/learning_langchain_folder/chatbot-docqa/data/attention.pdf"  # You’ll dynamically pass this later via Streamlit
retriever, namespace = create_pinecone_retriever_from_pdf(pdf_path)

# Wrap as LangChain tool
rag_tool = Tool(
    name="Document_Retrieval",
    func=lambda q: retriever.get_relevant_documents(q),
    description=(
        "Use this tool to answer any questions related to the user's uploaded PDF document. "
        "It contains detailed information the user may reference or ask questions about."
    )
)


tools.append(rag_tool)


  embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [8]:
query = "What is the main idea discussed in the uploaded document?"
docs = retriever.get_relevant_documents(query)

# Then send that as context to the LLM
from langchain.schema import HumanMessage

response = llm.invoke([
    HumanMessage(content=f"Based on the following context, answer the question:\n\n{docs}\n\nQuestion: {query}")
])
print(response.content)


  docs = retriever.get_relevant_documents(query)


Based on the provided documents, the main idea discussed is the **attention mechanism**, specifically within the context of the Transformer model architecture. The documents cover:

*   The Transformer model architecture using stacked self-attention.
*   The encoder and decoder stacks within the Transformer, including multi-head self-attention.
*   Visualizations of the attention mechanism, showing how it follows long-distance dependencies and is involved in anaphora resolution.


In [21]:
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)


In [22]:
from langchain.agents import initialize_agent, AgentType

# Initialize the agent
agent = initialize_agent(
    tools=tools,                     # All your tools, including RAG
    llm=llm,                         # Gemini Flash
    agent="conversational-react-description",
    memory=memory,
    verbose=True,
    handle_parsing_errors=True
)


In [23]:
while True:
    query = input("You: ")
    if query.lower() in {"exit", "quit"}:
        break
    response = agent.invoke(query)
    print("Bot:", response["output"] if isinstance(response, dict) else response)




[1m> Entering new AgentExecutor chain...[0m




[32;1m[1;3mThought: Do I need to use a tool? No
AI: Hello John from Sydney! It's nice to meet you.[0m

[1m> Finished chain.[0m
Bot: Hello John from Sydney! It's nice to meet you.


[1m> Entering new AgentExecutor chain...[0m




[32;1m[1;3mThought: Do I need to use a tool? No
AI: Your name is John.[0m

[1m> Finished chain.[0m
Bot: Your name is John.


[1m> Entering new AgentExecutor chain...[0m




[32;1m[1;3m```
Thought: Do I need to use a tool? Yes
Action: Document_Retrieval
Action Input: What does the document say about self-attention?[0m
Observation: [36;1m[1;3m[Document(id='ff0348ff-a629-4ddf-9962-62667bd4d569', metadata={'author': '', 'creationdate': '2024-04-10T21:11:43+00:00', 'creator': 'LaTeX with hyperref', 'keywords': '', 'moddate': '2024-04-10T21:11:43+00:00', 'page': 1.0, 'page_label': '2', 'producer': 'pdfTeX-1.40.25', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'source': '/Users/praddy/Documents/CODE/learning_langchain_folder/chatbot-docqa/data/attention.pdf', 'subject': '', 'title': '', 'total_pages': 15.0, 'trapped': '/False'}, page_content='described in section 3.2.\nSelf-attention, sometimes called intra-attention is an attention mechanism relating different positions\nof a single sequence in order to compute a representation of the sequence. Self-attention has been\nused successfully in a v



[32;1m[1;3mDo I need to use a tool? No
AI: The uploaded document describes self-attention (sometimes called intra-attention) as an attention mechanism that relates different positions of a single sequence to compute a representation of the sequence. It has been used successfully in tasks like reading comprehension, abstractive summarization, textual entailment, and learning task-independent sentence representations. The document also mentions that self-attention could yield more interpretable models and that individual attention heads appear to exhibit behavior related to the syntactic and semantic structure of sentences.[0m

[1m> Finished chain.[0m
Bot: The uploaded document describes self-attention (sometimes called intra-attention) as an attention mechanism that relates different positions of a single sequence to compute a representation of the sequence. It has been used successfully in tasks like reading comprehension, abstractive summarization, textual entailment, and learnin



[32;1m[1;3m```
Thought: Do I need to use a tool? No
AI: You are from Sydney.
```[0m

[1m> Finished chain.[0m
Bot: You are from Sydney.
```
