In [6]:
# Install LangChain version 0.3.13
# LangChain connects LLMs to external tools like vector databases, document loaders, and APIs
!pip install langchain==0.3.13

# Install LangGraph version 0.2.60
# LangGraph lets you create agent workflows (graphs) on top of LangChain
!pip install langgraph==0.2.60

# Install OpenAI Python client version 1.58.1
# This library allows your Python code to interact with OpenAI models like GPT-4
!pip install openai==1.58.1



In [8]:
# Import libraries for working with environment variables and securely entering API keys
import os
import getpass

# Required: Set your OpenAI API key so that LangChain can access GPT models
os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

# Optional: Set your LangChain API key if you're using LangSmith for tracing/debugging (not required for this project)
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass("Enter your LangChain API key (if you have one): ")

# Optional but recommended: Turn on LangChain tracing for better debugging and visualization
os.environ["LANGCHAIN_TRACING_V2"] = "true"

# Optional: Set your Tavily API key if you plan to use web search (not required for this Fidelity project)
os.environ["TAVILY_API_KEY"] = getpass.getpass("Enter your Tavily API key (if using): ")

Enter your OpenAI API key:  ········
Enter your LangChain API key (if you have one):  ········
Enter your Tavily API key (if using):  ········


In [12]:
# ---------------------------------------
# Import Required Libraries and Set Up Environment
# ---------------------------------------

# LangChain components for models, embeddings, prompting, document loading, and vector storage
from langchain_openai import ChatOpenAI, OpenAIEmbeddings  # For OpenAI LLM and embedding generation
from langchain.prompts import ChatPromptTemplate  # For creating prompt templates
from langchain_community.document_loaders import WebBaseLoader  # For loading content from URLs
from langchain_community.vectorstores import Chroma  # For storing text embeddings in a vector store

# LangGraph core for building AI agent workflows as graphs
import langgraph
from langgraph.graph import StateGraph, END  # StateGraph builds the flow, END marks the final node

# Python standard libraries
from typing import TypedDict, Annotated  # For defining custom types (used in LangGraph state)
import os  # For setting environment variables

# ---------------------------------------
# Set Headers for Web Page Access
# ---------------------------------------

# Some websites (like Fidelity) require a real User-Agent header to allow scraping
# Set a User-Agent string to mimic a browser and avoid getting blocked
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
os.environ["USER_AGENT"] = user_agent  # Useful for consistent access

In [14]:
# Load the Fidelity "How Much Do I Need to Retire?" webpage using WebBaseLoader

# Create a loader for the Fidelity retirement article
loader = WebBaseLoader("https://www.fidelity.com/viewpoints/retirement/how-much-do-i-need-to-retire")

# Load the webpage content into a list of documents
documents = loader.load()

# Optional: print out how many documents were loaded and a sample of the content
print(f"Loaded {len(documents)} document(s).")
print(documents[0].page_content[:500])  # Show first 500 characters of the first document to see what we get

Loaded 1 document(s).









How much do I need to retire? | Fidelity











































































Skip to Main Content.
Site navigation

Fidelity.com Home
Fidelity.com Home
 

Customer Service

Profile


Open an Account

Fidelity Assistant
Log In

Customer Service

Profile


Open an Account

Fidelity Assistant
Log Out
 


 
 



Accounts & Trade


Portfolio Log In Required
Portfolio
AccountPositions Log In Required
AccountPositions
Trade Log In Required
Trade
Trading D


In [26]:
# Store the loaded document into a Chroma vector store

# What is a Chroma vector store?
# ---------------------------------------
# Chroma is a database specifically designed to store "embeddings."
# An embedding is a numerical representation (vector) of a chunk of text,
# where similar texts are mapped to nearby points in a mathematical space.
# 
# Why do we use it?
# It lets the AI agent perform "semantic search" — meaning it can
# find relevant information based on the meaning of a user's question,
# not just exact keyword matching.

# Step 1: Create an embeddings object using OpenAIEmbeddings
embedding_function = OpenAIEmbeddings()

# Step 2: Create the Chroma vector store and insert the loaded documents
vectorstore = Chroma.from_documents(
    documents,             # List of documents we loaded from Fidelity
    embedding_function     # Function that turns documents into vectors
)

# Now your document is stored inside Chroma and ready for semantic search!
print("Document successfully stored in Chroma vector store.")

Document successfully stored in Chroma vector store.


In [28]:
# ---------------------------------------
# Create a retriever tool from the Chroma vector store (improved version)
# ---------------------------------------

# What is a retriever?
# ---------------------
# A retriever is like a search engine for your vector store (Chroma).
# It takes a user's question, finds the most relevant chunks of information
# from the documents, and returns them.

# Step 1: Create a retriever from the Chroma vectorstore
retriever = vectorstore.as_retriever()

# Step 2: Customize retriever settings
# Automatically set 'k' to be no more than the number of documents loaded
num_docs_loaded = len(documents)
retriever.search_kwargs = {"k": min(3, num_docs_loaded)}  # Fetch up to 3 documents, or fewer if less available

print(f"Retriever tool successfully created and ready. (Fetching up to {retriever.search_kwargs['k']} documents)")

Retriever tool successfully created and ready. (Fetching up to 1 documents)


In [30]:
# Import Tool class
from langchain_core.tools import Tool

# Create a retriever tool
retriever_tool = Tool(
    name="fidelity_retriever",  # Name for the tool
    description="Use this tool to search for retirement information from Fidelity documents.",
    func=retriever.invoke  # This links the retriever's "search" ability to the tool
)

print("Retriever tool wrapped and ready for use by the AI agent.")

Retriever tool wrapped and ready for use by the AI agent.


In [32]:
# ---------------------------------------
# Create the PromptTemplate and Relevance Checking Chain
# ---------------------------------------

from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

# Step 1: Define a prompt template that tells the LLM how to judge relevance
prompt = PromptTemplate(
    template="""You are a financial advisor specializing in retirement planning.

Here is the retrieved document:
{context}

Here is the user's question:
{question}

Please do the following:
- State "YES" if the document provides information that could directly or indirectly answer the user's question.
- State "NO" if the document is unrelated or off-topic.
- Briefly explain in exactly one sentence why you scored it YES or NO.
- Highlight a key phrase or idea from the document that supports your decision.
Respond very concisely.""",
    input_variables=["context", "question"]
)

# Step 2: Initialize the OpenAI LLM model you will use
# Corrected: Using GPT-4o-mini (new model)
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# Step 3: Chain the prompt directly into the LLM using the modern RunnableSequence style
relevance_chain = prompt | llm

print("Relevance-checking chain (RunnableSequence) created successfully.")

Relevance-checking chain (RunnableSequence) created successfully.


In [34]:
# -----------------------------
# Block 1: Node Definitions
# -----------------------------

from langgraph.graph import StateGraph, END
from langchain_core.runnables import RunnableLambda
from typing import TypedDict

# Define the structure of shared state passed between nodes
class AgentState(TypedDict):
    messages: list          # Chat history or user questions
    context: str            # Retrieved document content
    relevance_result: str   # 'yes' or 'no' from the relevance checker
    answer: str             # Final response from the agent

# Node 1: Retrieve relevant documents from Chroma
def retrieve_node(state: dict):
    question = state["messages"][-1][1]
    docs = retriever.invoke(question)
    context = "\n\n".join([doc.page_content for doc in docs])
    return {"context": context}

# Node 2: Use the relevance chain to check if context is useful
def relevance_check_node(state: dict):
    question = state["messages"][-1][1]
    context = state["context"]
    result = relevance_chain.invoke({"context": context, "question": question})
    return {"relevance_result": result.content.strip().lower()}

# Node 3: Generate a final answer using the context
# NOTE: Updated to match tutorial's expected structure

def generate_answer_node(state: dict):
    question = state["messages"][-1][1]
    context = state["context"]
    prompt = f"""You are a Fidelity financial advisor. Use the context below to answer the user's question.

Context:
{context}

User Question:
{question}
"""
    response = llm.invoke(prompt)
    return {"answer": {"messages": [response.content.strip()]}}

# Node 4: Fallback if nothing relevant was found

def no_answer_node(state: dict):
    return {"answer": {"messages": ["I'm sorry, I couldn't find relevant information to answer your question."]}}


In [36]:
# ---------------------------------------
# Build the LangGraph Agent (continued)
# ---------------------------------------

from langgraph.graph import StateGraph, END
from langchain_core.runnables import RunnableLambda

# Initialize the LangGraph using the defined AgentState type
graph = StateGraph(AgentState)

# Register each node (function) in the agent workflow
# Each node is a step in the AI agent's reasoning pipeline
graph.add_node("retrieve", RunnableLambda(retrieve_node))
graph.add_node("check_relevance", RunnableLambda(relevance_check_node))
graph.add_node("generate_answer", RunnableLambda(generate_answer_node))
graph.add_node("no_answer", RunnableLambda(no_answer_node))

# Set the starting point of the graph — always begins with document retrieval
graph.set_entry_point("retrieve")

# Define how the graph flows between nodes
# After retrieving, check if the retrieved docs are relevant
graph.add_edge("retrieve", "check_relevance")

# Conditional routing logic — send to answer node only if relevant
# Otherwise go to fallback response

def route_based_on_relevance(state: AgentState):
    return "generate_answer" if "yes" in state["relevance_result"] else "no_answer"

graph.add_conditional_edges("check_relevance", route_based_on_relevance)

# Define terminal nodes — the end of the line for each branch
graph.add_edge("generate_answer", END)
graph.add_edge("no_answer", END)

# Compile the defined state graph into an executable AI agent
agent = graph.compile()

print("LangGraph agent compiled successfully.")


LangGraph agent compiled successfully.


In [43]:
# ---------------------------------------
# Send a Sample Prompt to Your Graph and Print the Result
# ---------------------------------------

# Step 1: Define the input — simulating a user asking a retirement planning question
inputs = {
    "messages": [
        ("user", "What are the steps that I should take to determine how much I need to save for retirement?")
    ]
}

# Step 2: Run the compiled LangGraph agent using that input
result = agent.invoke(inputs)

# Step 3: Show the final response generated by the agent
print("=== Final Answer ===")
print(result["answer"])

=== Final Answer ===
{'messages': ["To determine how much you need to save for retirement, follow these steps:\n\n1. **Assess Your Current Financial Situation**: Start by evaluating your current income, expenses, savings, and investments. This will give you a clear picture of where you stand financially.\n\n2. **Define Your Retirement Goals**: Consider when you want to retire and what kind of lifestyle you envision. Will your expenses decrease, remain the same, or increase in retirement? This will impact how much you need to save.\n\n3. **Use Fidelity's Guidelines**: Aim to save at least:\n   - 1x your salary by age 30\n   - 3x by age 40\n   - 6x by age 50\n   - 8x by age 60\n   - 10x by age 67\n\n   These milestones can serve as goalposts to help you plan your savings.\n\n4. **Calculate Your Income Needs**: Estimate how much income you will need in retirement. A common guideline is to plan for your savings to replace about 45% of your pretax, preretirement income. Adjust this percenta

In [45]:
inputs = {
    "messages": [
        ("user", "How much should I have saved by the time I’m 33 if I want to retire comfortably?")
    ]
}

# Run the agent
result = agent.invoke(inputs)

# Show the result
print("=== Final Answer ===")
print(result["answer"])

=== Final Answer ===
{'messages': ["According to Fidelity's guidelines, by age 33, you should aim to have saved at least 1x your annual salary for retirement. This means if you earn $50,000 a year, you should have approximately $50,000 saved by age 33. \n\nKeep in mind that this is a general guideline, and your personal savings goal may vary based on factors such as your desired retirement lifestyle and when you plan to retire. If you have specific retirement goals or lifestyle expectations, you may want to adjust your savings target accordingly."]}


In [47]:
inputs = {
    "messages": [
        ("user", "Who won the NBA championship in 2023?")
    ]
}

# Run the agent
result = agent.invoke(inputs)

# Show the result
print("=== Final Answer ===")
print(result["answer"])

=== Final Answer ===
{'messages': ["I'm sorry, I couldn't find relevant information to answer your question."]}


In [49]:
# Install PyMuPDF if you haven't already
!pip install pymupdf

# Import required libraries
import fitz  # PyMuPDF
from langchain_core.documents import Document

# Path to your PDF file
pdf_path = '/Users/daniel/Documents/Northwestern/MSDS-442 AI Agent Design & Development/Assignment_1/FFFGX.pdf'

# Load the PDF and extract text
def load_pdf_with_pymupdf(pdf_path):
    doc = fitz.open(pdf_path)
    documents = []
    for page_num, page in enumerate(doc, start=1):
        text = page.get_text()
        if text.strip():  # skip empty pages
            documents.append(Document(page_content=text, metadata={"page": page_num}))
    doc.close()
    return documents

# Load the PDF
pdf_documents = load_pdf_with_pymupdf(pdf_path)

# Check how many pages were loaded
print(f"Loaded {len(pdf_documents)} pages from PDF.")
print(pdf_documents[0].page_content[:500])  # Print first 500 chars of page 1

Collecting pymupdf
  Downloading pymupdf-1.25.5-cp39-abi3-macosx_11_0_arm64.whl.metadata (3.4 kB)
Downloading pymupdf-1.25.5-cp39-abi3-macosx_11_0_arm64.whl (18.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.6/18.6 MB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: pymupdf
Successfully installed pymupdf-1.25.5
Loaded 6 pages from PDF.
Allocation
Fidelity Freedom® 2045 Fund (FFFGX)
  No Transaction Fee4 
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
9000.00
12000.00
15000.00
18000.00
21000.00
24000.00
27000.00
30000.00
33000.00
36000.00
9.00K
12.00K
15.00K
18.00K
21.00K
24.00K
27.00K
30.00K
33.00K
36.00K
Average Annual Total Returns
Hypothetical Growth of $10,0006,7
AS OF 11/30/2024 ;  Target-Date 2045
 FFFGX : $23,898     
 S&P 500 Index : $35,002     
 Target-Date 2045 : 


In [51]:
# ---------------------------------------
# Create a New Vector Store from the Loaded PDF
# ---------------------------------------

# Step 1: Create an embeddings object
embedding_function = OpenAIEmbeddings()

# Step 2: Create a Chroma vector store with the PDF pages
pdf_vectorstore = Chroma.from_documents(
    pdf_documents,           # documents loaded from the Fidelity Freedom 2045 PDF
    embedding_function       # embedding function
)

# Step 3: Create a retriever from the PDF vectorstore
pdf_retriever = pdf_vectorstore.as_retriever()
pdf_retriever.search_kwargs = {"k": 3}  # fetch top 3 relevant chunks

print("PDF document successfully stored in Chroma vector store and retriever is ready.")

PDF document successfully stored in Chroma vector store and retriever is ready.


In [53]:
# ---------------------------------------
# Set Up Chat Model
# ---------------------------------------

from langchain_core.messages import HumanMessage
from langchain_openai import ChatOpenAI

# Use a model (you already know GPT-4o works fine)
llm = ChatOpenAI(model="gpt-4o", temperature=0)

# ---------------------------------------
# Define a Function to Ask Questions Based on PDF
# ---------------------------------------

def ask_pdf_question(question):
    # Retrieve relevant pages
    docs = pdf_retriever.invoke(question)
    
    # Combine content from retrieved documents
    context = "\n\n".join([doc.page_content for doc in docs])
    
    # Format the message
    message = HumanMessage(content=f"""You are analyzing a Fidelity fund document.
Use the below context to answer the question:

Context:
{context}

Question:
{question}
""")
    
    # Get the model's answer
    response = llm.invoke([message])
    
    # Print the answer
    print("\n=== Answer ===")
    print(response.content)

# ---------------------------------------
# Now you can call ask_pdf_question() with each prompt
# ---------------------------------------

# Example questions the tutorial gave you:

ask_pdf_question("What is the name of this fund?")
ask_pdf_question("Who is the fund manager?")
ask_pdf_question("What is the calendar year return for 2022 for this fund and S&P 500?")
ask_pdf_question("What is the Portfolio Net Assets?")
ask_pdf_question("What is the Morningstar rating for this fund? How many funds used to rate this fund?")


=== Answer ===
The name of the fund is Fidelity Freedom® 2045 Fund.

=== Answer ===
The document does not explicitly mention the name of the fund manager. However, it states that Fidelity Management & Research Company LLC is the Adviser responsible for managing the fund.

=== Answer ===
The calendar year return for 2022 for the Fidelity Freedom® 2045 Fund was -18.26%, and for the S&P 500, it was -18.11%.

=== Answer ===
The document does not explicitly state the exact value of the Portfolio Net Assets. However, Portfolio Net Assets are defined as the difference between a portfolio's total assets and liabilities, including all share classes of the fund. To find the specific value, you would typically need to refer to the fund's financial statements or a detailed report that includes this information.

=== Answer ===
The Morningstar rating for this fund is not explicitly stated in the provided context. However, the context does mention that the fund is rated within the Morningstar Categ