In [24]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/stock-market-performance-2024/Stock_Market_Performance_2024.pdf


In [25]:
!pip install langchain==0.2.3
!pip install langchain-core==0.2.5
!pip install langchain-openai==0.2.0
!pip install python-dotenv
!pip install langgraph
!pip install langchain-google-genai
!pip install langchain-community
!pip install langchain-chroma
!pip install chromadb

Collecting langchain==0.2.3
  Using cached langchain-0.2.3-py3-none-any.whl.metadata (6.9 kB)
Collecting langchain-core<0.3.0,>=0.2.0 (from langchain==0.2.3)
  Using cached langchain_core-0.2.43-py3-none-any.whl.metadata (6.2 kB)
Collecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain==0.2.3)
  Using cached langchain_text_splitters-0.2.4-py3-none-any.whl.metadata (2.3 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain==0.2.3)
  Using cached langsmith-0.1.147-py3-none-any.whl.metadata (14 kB)
Using cached langchain-0.2.3-py3-none-any.whl (974 kB)
Using cached langchain_core-0.2.43-py3-none-any.whl (397 kB)
Using cached langchain_text_splitters-0.2.4-py3-none-any.whl (25 kB)
Using cached langsmith-0.1.147-py3-none-any.whl (311 kB)
Installing collected packages: langsmith, langchain-core, langchain-text-splitters, langchain
  Attempting uninstall: langsmith
    Found existing installation: langsmith 0.3.45
    Uninstalling langsmith-0.3.45:
      Successfully uninstalled la

In [26]:
import os
from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated, Sequence
from langchain_core.messages import BaseMessage, SystemMessage, HumanMessage, ToolMessage
from operator import add as add_messages
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_core.tools import tool
from langchain_google_genai import ChatGoogleGenerativeAI

In [27]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("GEMINI_KEY")

In [28]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.7, google_api_key=secret_value_0)

In [29]:

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=secret_value_0)

In [30]:
pdf_path = "/kaggle/input/stock-market-performance-2024/Stock_Market_Performance_2024.pdf"

In [31]:
if not os.path.exists(pdf_path):
    raise FileNotFoundError(f"PDF not found: {pdf_path}")

In [32]:
pdf_loader = PyPDFLoader(pdf_path)

In [33]:
try:
    pages = pdf_loader.load()
    print(f"PDF has beeen loaded and has {len(pages)} pages")

except Exception as e:
    print(f"Error loading PDF: {e}")
    raise

PDF has beeen loaded and has 9 pages


In [34]:
#chuncking process
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
#chunk size : how many characters will be taken
#chunk overlap : how many characters will be overlapping between chunks

In [35]:
pages_split = text_splitter.split_documents(pages)

In [36]:
persist_directory = "./chroma_db"
collection_name = "stock_market"

if not os.path.exists(persist_directory):
    os.makedirs(persist_directory)

In [37]:
try:
    # creating the chroma database using embeddings model
    vectorstore = Chroma.from_documents(
        documents=pages_split,
        embedding=embeddings,
        persist_directory=persist_directory,
        collection_name=collection_name
    )
    print(f"Created ChromaDB vector store!")
except Exception as e:
    print(f"Error setting up ChromaDB: {str(e)}")
    raise

Created ChromaDB vector store!


In [38]:
# creating our retriever
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={'k':5} # k is the amount of chunks to return
)

In [39]:
@tool
def retriever_tool(query: str)-> str:
    """
    This tool searches and returns the information from the Stock Market Performance 2024 document.
    """
    docs = retriever.invoke(query)
    if not docs:
        return "I found no relevant information in the Stock Market Performance 2024 document"

    results = []
    for i, doc in enumerate(docs):
        results.append(f"Document {i+1}:\n{doc.page_content}")
    return "\n\n".join(results)

In [40]:
tools = [retriever_tool]

# Try binding tools
try:
    llm = llm.bind_tools(tools)
    print("Tools successfully bound!")
except NotImplementedError:
    print("bind_tools not supported, proceeding with alternative...")

class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]

def should_continue(state: AgentState):
    """Check if the last message contains tool call"""
    result = state["messages"][-1]
    return hasattr(result, 'tool_calls') and len(result.tool_calls) > 0

Tools successfully bound!


In [41]:
system_prompt = """
You are an intelligent AI assistant who answers questions about Stock Market Performance in 2024 based on the PDF document loaded into your knowledge base.
Use the retriever tool available to answer questions about the stock market performance data. You can make multiple calls if needed.
If you need to look up some information before asking a follow up question, you are allowed to do that!
Please always cite the specific parts of the documents you use in your answers.
"""

In [42]:
tools_dict = {our_tool.name: our_tool for our_tool in tools}

#LLM Agent
def call_llm(state: AgentState)-> AgentState:
    """Function to call the LLM with the current state"""
    messages = list(state['messages'])
    messages = [SystemMessage(content=system_prompt)] + messages
    message = llm.invoke(messages)
    return {'messages': [message]}

In [43]:
# Retriever Agent
def take_action(state: AgentState) -> AgentState:
    """Execute tool calls from the LLM's response."""

    tool_calls = state['messages'][-1].tool_calls
    results = []
    for t in tool_calls:
        print(f"Calling Tool: {t['name']} with query: {t['args'].get('query', 'No query provided')}")
        
        if not t['name'] in tools_dict: # Checks if a valid tool is present
            print(f"\nTool: {t['name']} does not exist.")
            result = "Incorrect Tool Name, Please Retry and Select tool from List of Available tools."
        
        else:
            result = tools_dict[t['name']].invoke(t['args'].get('query', ''))
            print(f"Result length: {len(str(result))}")
            

        # Appends the Tool Message
        results.append(ToolMessage(tool_call_id=t['id'], name=t['name'], content=str(result)))

    print("Tools Execution Complete. Back to the model!")
    return {'messages': results}

In [44]:
graph = StateGraph(AgentState)
graph.add_node("llm", call_llm)
graph.add_node("retriever_agent", take_action)

graph.add_conditional_edges(
    "llm",
    should_continue,
    {True: "retriever_agent", False: END}
)
graph.add_edge("retriever_agent", "llm")
graph.set_entry_point("llm")

rag_agent = graph.compile()

In [45]:
def running_agent():
    print("\n=== RAG AGENT===")
    
    while True:
        user_input = input("\nWhat is your question: ")
        if user_input.lower() in ['exit', 'quit']:
            break
            
        messages = [HumanMessage(content=user_input)] # converts back to a HumanMessage type

        result = rag_agent.invoke({"messages": messages})
        
        print("\n=== ANSWER ===")
        print(result['messages'][-1].content)


running_agent()


=== RAG AGENT===



What is your question:  how was the SMP500 performing in 2024



=== ANSWER ===
I need to access the document to answer your question.  Please provide the document.



What is your question:  quit
