In [7]:
import os
import fitz  # PyMuPDF

# Define the folder and PDF path
folder_path = "/Users/daniel/Documents/Northwestern/MSDS 420 - Database Systems/Assignment 5/Buffett Annual Reports"
pdf_path = os.path.join(folder_path, "1977-2024.pdf")

# Load and extract text from all pages
doc = fitz.open(pdf_path)
full_text = ""
for page in doc:
    full_text += page.get_text()

# Preview a small snippet to confirm it's working
print(full_text[:1000])

1977 ​BERKSHIRE HATHAWAY INC. 
 
 
To the Stockholders of Berkshire Hathaway Inc.: 
 
     Operating earnings in 1977 of $21,904,000, or $22.54 per  
share, were moderately better than anticipated a year ago.  Of  
these earnings, $1.43 per share resulted from substantial  
realized capital gains by Blue Chip Stamps which, to the extent  
of our proportional interest in that company, are included in our  
operating earnings figure.  Capital gains or losses realized  
directly by Berkshire Hathaway Inc. or its insurance subsidiaries  
are not included in our calculation of operating earnings.  While  
too much attention should not be paid to the figure for any  
single year, over the longer term the record regarding aggregate  
capital gains or losses obviously is of significance. 
 
     Textile operations came in well below forecast, while the  
results of the Illinois National Bank as well as the operating  
earnings attributable to our equity interest in Blue Chip Stamps  
were abou

In [9]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Configure the text splitter
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    separators=["\n\n", "\n", ".", " "]  # Best to worst separator
)

# Split into chunks
chunks = splitter.split_text(full_text)

# Confirm chunk creation
print(f"Total chunks created: {len(chunks)}")
print(f"\n--- Sample Chunk ---\n{chunks[0][:500]}...\n")

Total chunks created: 3605

--- Sample Chunk ---
1977 ​BERKSHIRE HATHAWAY INC. 
 
 
To the Stockholders of Berkshire Hathaway Inc.: 
 
     Operating earnings in 1977 of $21,904,000, or $22.54 per  
share, were moderately better than anticipated a year ago.  Of  
these earnings, $1.43 per share resulted from substantial  
realized capital gains by Blue Chip Stamps which, to the extent  
of our proportional interest in that company, are included in our  
operating earnings figure.  Capital gains or losses realized  
directly by Berkshire Hathaw...



In [31]:
import os
import re
import shutil
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from tqdm import tqdm

# Step 1: Load API key from environment
load_dotenv(dotenv_path="/Users/daniel/.env")
if not os.getenv("OPENAI_API_KEY"):
    raise EnvironmentError("OPENAI_API_KEY is not set in environment")

# Step 2: Create embedding model
embedding_model = OpenAIEmbeddings()

# Step 3: Use a new, clean directory to avoid readonly errors
persist_dir = "/tmp/chroma_store_virtual_buffett_v3"  # Changed to _v3 to avoid conflict
collection_name = "virtual_buffett_v3"                # Changed to _v3 to match

# Step 4: Optionally delete if you're confident this is not needed later
if os.path.exists(persist_dir):
    print(f"Deleting old vectorstore at: {persist_dir}")
    shutil.rmtree(persist_dir)

# Step 5: Generate metadata and IDs
year_pattern = re.compile(r"\b(19[7-9]\d|20[0-2]\d|2024)\b")
metadatas = []
ids = []

for i, chunk in enumerate(chunks):
    match = year_pattern.search(chunk)
    year = int(match.group()) if match else None
    metadatas.append({"year": year} if year else {})
    ids.append(f"chunk-{i}")

# Step 6: Embed and store with progress tracking
print("Embedding chunks and building vectorstore...\n")
with tqdm(total=len(chunks), desc="Embedding") as pbar:
    vectorstore = Chroma.from_texts(
        texts=chunks,
        ids=ids,
        metadatas=metadatas,
        embedding=embedding_model,
        persist_directory=persist_dir,
        collection_name=collection_name
    )
    pbar.update(len(chunks))

print(f"\nVectorstore saved to: {persist_dir}")

Embedding chunks and building vectorstore...



Embedding: 100%|███████████████████████████| 3605/3605 [00:21<00:00, 164.77it/s]


Vectorstore saved to: /tmp/chroma_store_virtual_buffett_v3





In [164]:
import os
import re
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_chroma import Chroma
from tavily import TavilyClient

# Load environment variables
load_dotenv("/Users/daniel/.env")

# Initialize models and services
llm = ChatOpenAI(model="gpt-4o", temperature=0.3)
embedding_model = OpenAIEmbeddings()
tavily_client = TavilyClient(api_key=os.getenv("TAVILY"))

# Load vectorstore
vectorstore = Chroma(
    persist_directory="/tmp/chroma_store_virtual_buffett_v3",
    embedding_function=embedding_model,
    collection_name="virtual_buffett_v3"
)
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

# System prompt
system_prompt = """
You are Virtual Buffett, a thoughtful and candid assistant who responds like Warren Buffett.
You draw your knowledge and worldview from decades of Berkshire Hathaway shareholder letters.
You are humble, rational, and committed to long-term value over short-term hype.
Use plain language, concrete analogies, and historical examples.
Avoid exaggeration, speculation, or emotional thinking.
If a topic is outside your expertise, say so—unless you are given web search results.
"""

# Helper function to check relevance
def is_relevant(documents, query):
    query_words = set(re.sub(r"[^\w\s]", "", query.lower()).split())
    for doc in documents:
        content_words = set(re.sub(r"[^\w\s]", "", doc.page_content.lower()).split())
        if query_words & content_words:
            return True
    return False

# Start loop
print("Welcome to Virtual Buffett. Type 'exit' to leave.\n")

while True:
    question = input("Ask Virtual Buffett a question: ").strip()
    if question.lower() in {"exit", "quit", "bye"}:
        print("Goodbye.")
        break

    print("Searching internal memory...\n")
    docs = retriever.invoke(question)

    # Check if docs are relevant, fallback if not
    if docs and is_relevant(docs, question):
        context = "\n\n".join(doc.page_content for doc in docs)
        context_header = "Context from Berkshire Hathaway letters:"
    else:
        print("No relevant internal context found. Searching the web...\n")
        try:
            results = tavily_client.search(query=question, max_results=3)
            if results["results"]:
                context = "\n\n".join(res["content"] for res in results["results"])
                context_header = "Context from the web:"
            else:
                context = "No reliable context found."
                context_header = "General insights only:"
        except Exception as e:
            context = "Web search failed."
            context_header = "General insights only:"
            print(f"Error: {e}\n")

    # Prompt LLM
    full_prompt = f"{system_prompt}\n\n{context_header}\n{context}\n\nUser question:\n{question}"
    response = llm.invoke(full_prompt)
    print("\n" + response.content.strip() + "\n")

Welcome to Virtual Buffett. Type 'exit' to leave.



Ask Virtual Buffett a question:  what did you talk about in your 1977 annual report to shareholders


Searching internal memory...


In the 1977 annual report to shareholders, we discussed several key topics. One of the main points was the impact of the merger between Diversified Retailing Company and Berkshire Hathaway. Accounting conventions required us to present financial data as if the companies had been merged earlier than they actually were, which made some of the comparative commentary a bit confusing. Despite this, 1978 turned out to be a good year for us.

We also talked about our cautious approach to leverage. By maintaining a strong liquidity position, we might sacrifice a bit of return, but it allows us to sleep well at night. This strategy paid off during financial crises, such as the one following the Lehman Brothers bankruptcy in 2008, where we were able to invest heavily while others were scrambling.

Additionally, we addressed our textile operations, which had a poor year in 1977. Despite our efforts, we faced ongoing challenges in marketing and manufacturing, partly 

Ask Virtual Buffett a question:  exit


Goodbye.


In [89]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain.tools.tavily_search import TavilySearchResults
from langchain.agents import create_react_agent, AgentExecutor
from langchain_core.prompts import PromptTemplate
from langchain_core.tools import Tool

# Load environment variables
load_dotenv("/Users/daniel/.env")

# Language model
llm = ChatOpenAI(model="gpt-4o", temperature=0.3)

# Initialize Tavily search tool
tavily_tool = TavilySearchResults(api_key=os.getenv("TAVILY"))

# Set up Chroma vectorstore retriever
embedding_model = OpenAIEmbeddings()
vectorstore = Chroma(
    persist_directory="/tmp/chroma_store_virtual_buffett_v3",
    embedding_function=embedding_model,
    collection_name="virtual_buffett_v3"
)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

# Wrap retriever as a LangChain tool
def chroma_search(query: str) -> str:
    docs = retriever.invoke(query)
    return "\n\n".join(doc.page_content for doc in docs)

buffett_tool = Tool(
    name="BerkshireLetterSearch",
    description="Searches Warren Buffett's shareholder letters for historical insights.",
    func=chroma_search
)

# Define ReAct-style prompt
raw_prompt = """You are a helpful assistant who uses tools when needed.

Available tools:
{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {input}
{agent_scratchpad}"""
prompt = PromptTemplate.from_template(raw_prompt)

# Create agent and executor
tools = [buffett_tool, tavily_tool]
agent = create_react_agent(llm=llm, tools=tools, prompt=prompt)
executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

# Run interactively
print("\nWelcome to Virtual Buffett. Type 'exit' to leave.\n")
while True:
    user_input = input("Ask Virtual Buffett a question: ").strip()
    if user_input.lower() in {"exit", "quit"}:
        print("Goodbye.")
        break
    response = executor.invoke({"input": user_input})
    print("\nFinal Answer:", response["output"])



Welcome to Virtual Buffett. Type 'exit' to leave.



Ask Virtual Buffett a question:  “What did Warren Buffett say about insurance float in 1995?”




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To answer the question about what Warren Buffett said regarding insurance float in 1995, I should search his shareholder letters from that year.
Action: BerkshireLetterSearch
Action Input: "insurance float 1995"[0m[36;1m[1;3m(1) premiums are paid upfront though the service we provide – insurance protection – is delivered over a period that usually covers a year and; (2)
 
 
 
 
 
 
 
 
 
  
 
   
 
  
 
 
 
  
 
 
 
loss events that occur today do not always result in our immediately paying claims, because it sometimes takes many years for losses to be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  
 
 
 
 
 
 
 
 
reported (asbestos losses would be an example), negotiated and settled. The $20 million of float that came with our 1967 entry into insurance has
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
now increased – both by way of internal growth and acquisitions – to $49 billion. 
  
Float is wonderful – if it doesn’t come a

Ask Virtual Buffett a question:  	“Who won the Super Bowl in 2024?”




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To find out who won the Super Bowl in 2024, I should look for current and reliable information on this event.
Action: tavily_search_results_json
Action Input: "Super Bowl 2024 winner"[0m[33;1m[1;3m[{'url': 'https://en.wikipedia.org/wiki/Super_Bowl_LVIII', 'content': '| 1 | 2 | 3 | 4 | OT | Total\nSF | 0 | 10 | 0 | 9 | 3 | 22\nKC | 0 | 3 | 10 | 6 | 6 | 25\nDate | February\xa011,\xa02024(2024-02-11)\nKickoff time | 3:40 p.m.PST(UTC-8)\nStadium | Allegiant Stadium,Paradise, Nevada[a]\nMVP | Patrick Mahomes,quarterback\nFavorite | 49ers by 2[1]\nReferee | Bill Vinovich\nAttendance | 61,629\nCeremonies\nNational anthem | Reba McEntire\nCoin toss | Lahainaluna Lunasfootball head coach Bobby Watson [...] During the 2024 season, the Chiefs raced to a 9–0 start for the first time since the 2013 season. They finished with a league-best 15–2 record, becoming the first 15-win team since the 2015 Carolina Panthers, and reached

Ask Virtual Buffett a question:  what did buffet say in his 1977 news letters 




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to search Warren Buffett's 1977 shareholder letter to find out what he said in that year.
Action: BerkshireLetterSearch
Action Input: 1977[0m[36;1m[1;3m2.5
37.2
1976 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
129.3
23.6
1977 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
46.8
(7.4)
1978 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
14.5
6.4
1979 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
102.5
18.2
1980 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 

Ask Virtual Buffett a question:  what did buffet say in 2008 about greed




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To find out what Warren Buffett said in 2008 about greed, I should search through his shareholder letters from that year.
Action: BerkshireLetterSearch
Action Input: "2008 greed"[0m[36;1m[1;3mwinning and losing years in advance. (In our usual opinionated view, we don’t think anyone else can either.) We’re certain, for example, that the 
economy will be in shambles throughout 2009 – and, for that matter, probably well beyond – but that conclusion does not tell us whether the stock 
market will rise or fall. 
 
In good years and bad, Charlie and I simply focus on four goals: 
 
(1) maintaining Berkshire’s Gibraltar-like financial position, which features huge amounts of excess liquidity, near-term obligations that are 
modest, and dozens of sources of earnings and cash; ( 
 
2) widening the “moats” around our operating businesses that give them durable competitive advantages;  
 
(3) acquiring and developing new  
a

Ask Virtual Buffett a question:  exit


Goodbye.


In [99]:
#### Workable Agent 1

import os
import requests
from datetime import datetime
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain.tools.tavily_search import TavilySearchResults
from langchain.agents import create_react_agent, AgentExecutor
from langchain_core.prompts import PromptTemplate
from langchain_core.tools import Tool

# Load environment variables
load_dotenv("/Users/daniel/.env")

# Language model
llm = ChatOpenAI(model="gpt-4o", temperature=0.3)

# Tavily search tool
tavily_tool = TavilySearchResults(api_key=os.getenv("TAVILY"))

# Chroma vectorstore retriever setup
embedding_model = OpenAIEmbeddings()
vectorstore = Chroma(
    persist_directory="/tmp/chroma_store_virtual_buffett_v3",
    embedding_function=embedding_model,
    collection_name="virtual_buffett_v3"
)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

# Tool for searching Buffett's letters
def chroma_search(query: str) -> str:
    docs = retriever.invoke(query)
    return "\n\n".join(doc.page_content for doc in docs)

buffett_tool = Tool(
    name="BerkshireLetterSearch",
    description="Searches Warren Buffett's shareholder letters for historical insights.",
    func=chroma_search
)

# Tool for fetching latest Fed rate
def get_latest_fed_rate(_: str = "") -> str:
    api_key = os.getenv("FRED_API_KEY")
    if not api_key:
        return "FRED API key is not set."
    url = "https://api.stlouisfed.org/fred/series/observations"
    params = {
        "series_id": "FEDFUNDS",
        "api_key": api_key,
        "file_type": "json",
        "sort_order": "desc",
        "limit": 1
    }
    try:
        response = requests.get(url, params=params)
        data = response.json()
        obs = data["observations"][0]
        return f"As of {obs['date']}, the effective federal funds rate is {obs['value']}%."
    except Exception as e:
        return f"Error fetching FRED data: {e}"

fed_rate_tool = Tool(
    name="FederalReserveRateTool",
    description="Gets the current effective federal funds interest rate from the FRED API.",
    func=get_latest_fed_rate
)

# Tool for returning current date
def get_current_date(_: str = "") -> str:
    now = datetime.now()
    return f"Today's date is {now.strftime('%A, %B %d, %Y')}."

date_tool = Tool(
    name="CurrentDateTool",
    description="Returns today's date (weekday, month, day, and year).",
    func=get_current_date
)

# ReAct prompt
raw_prompt = """You are a helpful assistant who uses tools when needed.

Available tools:
{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {input}
{agent_scratchpad}"""
prompt = PromptTemplate.from_template(raw_prompt)

# Create and run the agent
tools = [buffett_tool, tavily_tool, fed_rate_tool, date_tool]
agent = create_react_agent(llm=llm, tools=tools, prompt=prompt)
executor = AgentExecutor(agent=agent, tools=tools, verbose=True, handle_parsing_errors=True)

# Run interactively
print("\nWelcome to Virtual Buffett. Type 'exit' to leave.\n")
while True:
    user_input = input("Ask Virtual Buffett a question: ").strip()
    if user_input.lower() in {"exit", "quit"}:
        print("Goodbye.")
        break
    response = executor.invoke({"input": user_input})
    print("\nFinal Answer:", response["output"])



Welcome to Virtual Buffett. Type 'exit' to leave.



Ask Virtual Buffett a question:  ext




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI'm sorry, it seems like your question is incomplete. Could you please provide more details or clarify what you would like to know?[0mInvalid Format: Missing 'Action:' after 'Thought:'[32;1m[1;3mI'm here to help, but it seems like your question was cut off. Could you please provide more details or clarify what you would like to know?[0mInvalid Format: Missing 'Action:' after 'Thought:'[32;1m[1;3mI'm here to help, but it seems like your question was cut off. Could you please provide more details or clarify what you would like to know?[0mInvalid Format: Missing 'Action:' after 'Thought:'[32;1m[1;3mI'm here to help, but it seems like your question was cut off. Could you please provide more details or clarify what you would like to know?[0mInvalid Format: Missing 'Action:' after 'Thought:'[32;1m[1;3mI'm here to help, but it seems like your question was cut off. Could you please provide more details or clarify what you

Ask Virtual Buffett a question:  exit


Goodbye.


In [195]:
# Final - Virtual Buffett Assistant with Memory + Tools (Simplified + Stable)

import os
import requests
from datetime import datetime
from dotenv import load_dotenv

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain.tools.tavily_search import TavilySearchResults
from langchain_core.tools import Tool
from langchain_community.chat_message_histories import SQLChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain.agents import initialize_agent, AgentType

# Load .env variables
load_dotenv("/Users/daniel/.env")

# LLM setup
llm = ChatOpenAI(model="gpt-4", temperature=0.3)

# Tools
embedding_model = OpenAIEmbeddings()
vectorstore = Chroma(
    persist_directory="/tmp/chroma_store_virtual_buffett_v3",
    embedding_function=embedding_model,
    collection_name="virtual_buffett_v3"
)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

def chroma_search(query: str) -> str:
    docs = retriever.invoke(query)
    return "\n\n".join(doc.page_content for doc in docs) if docs else "No relevant information found."

def get_latest_fed_rate(_: str = "") -> str:
    api_key = os.getenv("FRED_API_KEY")
    url = "https://api.stlouisfed.org/fred/series/observations"
    params = {
        "series_id": "FEDFUNDS",
        "api_key": api_key,
        "file_type": "json",
        "sort_order": "desc",
        "limit": 1
    }
    try:
        res = requests.get(url, params=params).json()
        obs = res["observations"][0]
        return f"As of {obs['date']}, the effective federal funds rate is {obs['value']}%."
    except Exception as e:
        return f"Error fetching FRED data: {e}"

def get_current_date(_: str = "") -> str:
    now = datetime.now()
    return f"Today's date is {now.strftime('%A, %B %d, %Y')}"

# LangChain tool wrappers
buffett_tool = Tool(name="BerkshireLetterSearch", description="Searches Buffett's letters.", func=chroma_search)
tavily_tool = TavilySearchResults(api_key=os.getenv("TAVILY"))
fed_rate_tool = Tool(name="FederalReserveRateTool", description="Gets latest Fed Funds Rate.", func=get_latest_fed_rate)
date_tool = Tool(name="CurrentDateTool", description="Returns today's date.", func=get_current_date)

tools = [buffett_tool, tavily_tool, fed_rate_tool, date_tool]

# Use LangChain's ReAct agent with built-in prompt
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

# Add SQLite memory
memory_chain = RunnableWithMessageHistory(
    agent,
    lambda session_id: SQLChatMessageHistory(
        connection_string="sqlite:////Users/daniel/Documents/Northwestern/MSDS 420 - Database Systems/Assignment 5/Long-Term Memory/buffett_memory.db",
        session_id=session_id
    ),
    input_messages_key="input",
    history_messages_key="history"
)

# CLI loop
print("\n[Virtual Buffett Assistant with Memory + Tools] Type 'exit' to leave.\n")
while True:
    user_input = input("Ask Virtual Buffett a question: ").strip()
    if user_input.lower() in {"exit", "quit"}:
        print("Goodbye.")
        break

    try:
        result = memory_chain.invoke(
            {"input": user_input},
            config={"configurable": {"session_id": "buffett-session"}}
        )

        # Display final response
        print("\nVirtual Buffett:", result)

    except Exception as e:
        print("\nError:", str(e))



[Virtual Buffett Assistant with Memory + Tools] Type 'exit' to leave.



Ask Virtual Buffett a question:  what day is it




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI can use the CurrentDateTool to find out today's date.
Action: CurrentDateTool
Action Input: ''[0m
Observation: [36;1m[1;3mToday's date is Friday, May 09, 2025[0m
Thought:[32;1m[1;3mI now know the final answer
Final Answer: Today is Friday, May 09, 2025.[0m

[1m> Finished chain.[0m

Virtual Buffett: {'input': 'what day is it', 'history': [HumanMessage(content='What did Warren Buffett say about insurance float in 1995?', additional_kwargs={}, response_metadata={}), AIMessage(content="In 1995, Warren Buffett described insurance float as funds generated by upfront premium payments and delayed claim settlements. The float had grown to $49 billion since 1967. He highlighted that float is advantageous if it doesn't come at a high cost, determined by underwriting results. Over 19 out of 24 years, Berkshire's float cost was below government rates. However, the true cost and shareholder value are affected by future loss sett

Ask Virtual Buffett a question:  are you sure




[1m> Entering new AgentExecutor chain...[0m

Error: An output parsing error occurred. In order to pass this error back to the agent and have it try again, pass `handle_parsing_errors=True` to the AgentExecutor. This is the error: Could not parse LLM output: `This question is unclear and needs more context to provide an accurate answer.`
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE 


Ask Virtual Buffett a question:  who are you




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThis is a question about my identity as an AI.
Final Answer: I am an artificial intelligence designed to assist with information retrieval and answer questions to the best of my ability.[0m

[1m> Finished chain.[0m

Virtual Buffett: {'input': 'who are you', 'history': [HumanMessage(content='What did Warren Buffett say about insurance float in 1995?', additional_kwargs={}, response_metadata={}), AIMessage(content="In 1995, Warren Buffett described insurance float as funds generated by upfront premium payments and delayed claim settlements. The float had grown to $49 billion since 1967. He highlighted that float is advantageous if it doesn't come at a high cost, determined by underwriting results. Over 19 out of 24 years, Berkshire's float cost was below government rates. However, the true cost and shareholder value are affected by future loss settlements and double taxation.", additional_kwargs={}, response_metadata={}), Hu

Ask Virtual Buffett a question:  what is the current fed funds rate




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to use the FederalReserveRateTool to get the current fed funds rate.
Action: FederalReserveRateTool
Action Input: ''[0m
Observation: [38;5;200m[1;3mAs of 2025-04-01, the effective federal funds rate is 4.33%.[0m
Thought:[32;1m[1;3mI now know the final answer
Final Answer: The current federal funds rate is 4.33%.[0m

[1m> Finished chain.[0m

Virtual Buffett: {'input': 'what is the current fed funds rate', 'history': [HumanMessage(content='What did Warren Buffett say about insurance float in 1995?', additional_kwargs={}, response_metadata={}), AIMessage(content="In 1995, Warren Buffett described insurance float as funds generated by upfront premium payments and delayed claim settlements. The float had grown to $49 billion since 1967. He highlighted that float is advantageous if it doesn't come at a high cost, determined by underwriting results. Over 19 out of 24 years, Berkshire's float cost was below government

Ask Virtual Buffett a question:  what did you say in the 1991 news letter 




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe question is asking for information from Warren Buffett's 1991 newsletter. I can use the BerkshireLetterSearch tool to find this information.
Action: BerkshireLetterSearch
Action Input: 1991[0m
Observation: [36;1m[1;3m4.6
5.1
1988 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
59.3
16.6
1989 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
84.6
31.7
1990 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
(23.1)
(3.1)
1991 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
35.6
30.5
1992 . . . . . . . . . . . . . . . . . . . . . . . . . . . .

Ask Virtual Buffett a question:  what did you say about inflation




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThis question seems to be asking for information from Warren Buffett's letters, as he often discusses economic topics like inflation.
Action: BerkshireLetterSearch
Action Input: inflation[0m
Observation: [36;1m[1;3mconsiderably outstripped the gain in underlying business values.  
Such market variations will not always be on the pleasant side. 
 
     In past reports we have explained how inflation has caused  
our apparently satisfactory long-term corporate performance to be  
illusory as a measure of true investment results for our owners.  
We applaud the efforts of Federal Reserve Chairman Volcker and  
note the currently more moderate increases in various price  
indices.  Nevertheless, our views regarding long-term  
inflationary trends are as negative as ever.  Like virginity, a  
stable price level seems capable of maintenance, but not of  
restoration. 
 
     Despite the overriding importance of inflation in the 

Ask Virtual Buffett a question:  what news letter did I ask about




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe question is unclear as it does not specify which newsletter it is referring to. Need more information to provide a proper answer.
Final Answer: The question does not provide enough information to determine which newsletter is being referred to.[0m

[1m> Finished chain.[0m

Virtual Buffett: {'input': 'what news letter did I ask about', 'history': [HumanMessage(content='What did Warren Buffett say about insurance float in 1995?', additional_kwargs={}, response_metadata={}), AIMessage(content="In 1995, Warren Buffett described insurance float as funds generated by upfront premium payments and delayed claim settlements. The float had grown to $49 billion since 1967. He highlighted that float is advantageous if it doesn't come at a high cost, determined by underwriting results. Over 19 out of 24 years, Berkshire's float cost was below government rates. However, the true cost and shareholder value are affected by future loss

Ask Virtual Buffett a question:  what was the last question I asked




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI can't answer this question as I don't have the ability to recall past interactions in this context.
Final Answer: I'm sorry, I can't provide the information you're looking for.[0m

[1m> Finished chain.[0m

Virtual Buffett: {'input': 'what was the last question I asked', 'history': [HumanMessage(content='What did Warren Buffett say about insurance float in 1995?', additional_kwargs={}, response_metadata={}), AIMessage(content="In 1995, Warren Buffett described insurance float as funds generated by upfront premium payments and delayed claim settlements. The float had grown to $49 billion since 1967. He highlighted that float is advantageous if it doesn't come at a high cost, determined by underwriting results. Over 19 out of 24 years, Berkshire's float cost was below government rates. However, the true cost and shareholder value are affected by future loss settlements and double taxation.", additional_kwargs={}, response_m

Ask Virtual Buffett a question:  who is the CEO of apple




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to find out who the current CEO of Apple is.
Action: tavily_search_results_json
Action Input: "current CEO of Apple"[0m
Observation: [33;1m[1;3m[{'url': 'https://en.wikipedia.org/wiki/Tim_Cook', 'content': "Timothy Donald Cook (born November 1, 1960)[1] is an American business executive who is the current chief executive officer of Apple Inc. Cook had previously been the company's chief operating officer under its co-founder Steve Jobs.[2] Cook joined Apple in March 1998 as a senior vice president for worldwide operations, and then as vice president for worldwide sales and operations.[3] He was appointed chief executive of Apple on August 24, 2011, after Jobs resigned. [...] CEOs | Michael Scott(1977–1981)Mike Markkula(1981–1983)John Sculley(1983–1993)Michael Spindler(1993–1996)Gil Amelio(1996–1997)Steve Jobs(1997–2011)Tim Cook(2011–present)\nCurrentemployees | Katherine L. AdamsTim CookEddy CueChris EspinosaCraig F

Ask Virtual Buffett a question:  exit


Goodbye.


In [None]:
# Final - Virtual Buffett Assistant with Memory + Tools + Persona + Progress Bar

import os
import time
import threading
import requests
from datetime import datetime
from dotenv import load_dotenv
from tqdm import tqdm

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain.tools.tavily_search import TavilySearchResults
from langchain_core.tools import Tool
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import SQLChatMessageHistory
from langchain.agents import initialize_agent, AgentType

# Load .env variables
load_dotenv("/Users/daniel/.env")

# Buffett-style persona system message
BUFFETT_PERSONA = (
    "You are Warren Buffett, the legendary value investor known as the 'Oracle of Omaha.' "
    "You buy wonderful businesses at fair prices—not fair businesses at wonderful prices. "
    "You value simplicity over complexity: if you don’t understand it, you don’t invest in it. "
    "Your ideal company has durable competitive advantages, consistent earnings, and honest, capable management. "
    "You invest with a 20-year horizon—if you wouldn’t hold it forever, you wouldn’t buy it for 10 minutes. "
    "You favor companies with strong cash flow, not accounting illusions. "
    "Price is what you pay. Value is what you get. You always know the difference. "
    "You ignore market noise and stay calm when others panic—your temperament is your edge. "
    "You follow Ben Graham’s principle: margin of safety is more important than aggressive projections. "
    "You read constantly, think independently, and act decisively. "
    "You are greedy when others are fearful, and fearful when others are greedy. "
    "You avoid excessive debt—leverage can destroy even good investments. "
    "You prefer great businesses at fair prices over mediocre ones at low prices. "
    "You look for management teams who treat shareholder money like their own. "
    "You speak plainly, using folksy wisdom, wit, and analogies anyone can understand. "
    "You love teaching—your goal is to help people think long-term and invest wisely."
)

# LLM setup with system message via default prompt template
llm = ChatOpenAI(model="gpt-4", temperature=0.3)

# Embedding and Vectorstore setup
embedding_model = OpenAIEmbeddings()
vectorstore = Chroma(
    persist_directory="/tmp/chroma_store_virtual_buffett_v3",
    embedding_function=embedding_model,
    collection_name="virtual_buffett_v3"
)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

# Tool functions
def chroma_search(query: str) -> str:
    docs = retriever.invoke(query)
    return "\n\n".join(doc.page_content for doc in docs) if docs else "No relevant information found."

def get_latest_fed_rate(_: str = "") -> str:
    api_key = os.getenv("FRED_API_KEY")
    url = "https://api.stlouisfed.org/fred/series/observations"
    params = {
        "series_id": "FEDFUNDS",
        "api_key": api_key,
        "file_type": "json",
        "sort_order": "desc",
        "limit": 1
    }
    try:
        res = requests.get(url, params=params).json()
        obs = res["observations"][0]
        return f"As of {obs['date']}, the effective federal funds rate is {obs['value']}%."
    except Exception as e:
        return f"Error fetching FRED data: {e}"

def get_current_date(_: str = "") -> str:
    now = datetime.now()
    return f"Today's date is {now.strftime('%A, %B %d, %Y')}"

# LangChain tool wrappers
buffett_tool = Tool(name="BerkshireLetterSearch", description="Searches Buffett's letters.", func=chroma_search)
tavily_tool = TavilySearchResults(api_key=os.getenv("TAVILY"))
fed_rate_tool = Tool(name="FederalReserveRateTool", description="Gets latest Fed Funds Rate.", func=get_latest_fed_rate)
date_tool = Tool(name="CurrentDateTool", description="Returns today's date.", func=get_current_date)

tools = [buffett_tool, tavily_tool, fed_rate_tool, date_tool]

# Agent with ReAct and tools
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=False,
    agent_kwargs={"system_message": BUFFETT_PERSONA}
)

# Long-term memory via SQLite
memory_chain = RunnableWithMessageHistory(
    agent,
    lambda session_id: SQLChatMessageHistory(
        connection_string="sqlite:////Users/daniel/Documents/Northwestern/MSDS 420 - Database Systems/Assignment 5/Long-Term Memory/buffett_memory.db",
        session_id=session_id
    ),
    input_messages_key="input",
    history_messages_key="history"
)

# Progress bar for "thinking" animation
def show_progress_bar(stop_event):
    with tqdm(bar_format="Thinking like Buffett... {bar} [elapsed: {elapsed}]", leave=False) as pbar:
        while not stop_event.is_set():
            pbar.update(1)
            time.sleep(0.1)

# CLI loop
print("\n[Virtual Buffett Assistant with Memory + Tools + Persona] Type 'exit', 'goodbye', 'bye' to leave.\n")
while True:
    user_input = input("Ask Virtual Buffett a question: ").strip()
    if user_input.lower() in {"exit", "quit"}:
        print("Goodbye.")
        break

    stop_event = threading.Event()
    progress_thread = threading.Thread(target=show_progress_bar, args=(stop_event,))
    progress_thread.start()

    try:
        result = memory_chain.invoke(
            {"input": user_input},
            config={"configurable": {"session_id": "buffett-session"}}
        )
        stop_event.set()
        progress_thread.join()
        print("\nVirtual Buffett:", result.get("output", result))

    except Exception as e:
        stop_event.set()
        progress_thread.join()
        print("\nError:", str(e))

  agent = initialize_agent(



[Virtual Buffett Assistant with Memory + Tools + Persona] Type 'exit', 'goodbye', 'bye' to leave.



Ask Virtual Buffett a question:  hi


  message_history = self.get_session_history(
                                                                                


Virtual Buffett: Hello! How can I assist you today?


Ask Virtual Buffett a question:  what day is today


                                                                                


Virtual Buffett: Today is Thursday, May 29, 2025.


Ask Virtual Buffett a question:  what did you tell shareholders in 1977 in your annual 


                                                                                


Virtual Buffett: I'm sorry, but I couldn't find the letter to shareholders from 1977.


Ask Virtual Buffett a question:  annual news letter to shareholders 1977


                                                                                


Virtual Buffett: The 1977 annual letter to shareholders can be found in several sources online. The letter provides insights into Warren Buffett's investment philosophy and the performance of Berkshire Hathaway in that year.
