# Generate Reports

1. Vanilla LLM
2. Naive RAG
3. Kruppe Model

In [1]:
import pandas as pd
import asyncio
from kruppe.llm import OpenAILLM
import pathlib

experiment_dir = pathlib.Path("/Users/danielliu/Workspace/fin-rag/experiments")

In [None]:
df = pd.read_csv("./reports.csv", index_col=False)
df

In [2]:
llm = OpenAILLM()

async def generate_title(report: str):
    user_message = f"Given a report, create a short and concise title. Respond only with the title. Make sure every character is file name comptaible. \n\nReport: {report}\n\nOutput:"
    messages = [
        {"role": "system", "content": "You are a helpful assistant that summarizes reports."},
        {"role": "user", "content": user_message},
    ]

    llm_response = await llm.async_generate(messages)
    llm_string = llm_response.text
    return llm_string

## Vanilla LLM

In [None]:
llm = OpenAILLM()

In [None]:
from kruppe.prompts.experiments import VANILLA_QA_SYSTEM
async def vanilla_llm_response(question: str):
    messages = [
        {"role": "system", "content": VANILLA_QA_SYSTEM},
        {"role": "user", "content": question},
    ]

    llm_response = await llm.async_generate(messages)
    llm_string = llm_response.text
    return llm_string

# example:
await vanilla_llm_response(df.question[0])

In [None]:
vanilla_reports = await asyncio.gather(*[vanilla_llm_response(question) for question in df.question])
vanilla_reports

In [None]:
vanilla_titles = await asyncio.gather(*[generate_title(report) for report in vanilla_reports])
vanilla_titles

In [None]:
vanilla_report_dir = experiment_dir / "vanilla_report"
vanilla_report_dir.mkdir(exist_ok=True)

file_paths = []
for i, report in enumerate(vanilla_reports):
    title = vanilla_titles[i]
    file_path = vanilla_report_dir / f"{title}.txt"
    file_path.write_text(report)
    file_paths.append(file_path.as_posix())
    print(f"Saved report {title}")

df["vanilla_report_loc"] = file_paths
df

In [None]:
df.to_csv("./reports.csv", index=False)

## Naive RAG Response

In [None]:
df = pd.read_csv("./reports.csv", index_col=False)
df

## Kruppe Response

In [3]:
df = pd.read_csv("./reports.csv", index_col=False)
df

Unnamed: 0,category,human_report_loc,question,vanilla_report_loc,kruppe_report_loc
0,Ad Agency Industry,/Users/danielliu/Workspace/fin-rag/experiments...,What are the key developments and financial pr...,/Users/danielliu/Workspace/fin-rag/experiments...,/Users/danielliu/Workspace/fin-rag/experiments...
1,Ad Agency Industry,/Users/danielliu/Workspace/fin-rag/experiments...,What impact would a potential ban on pharmaceu...,/Users/danielliu/Workspace/fin-rag/experiments...,/Users/danielliu/Workspace/fin-rag/experiments...
2,Ad Agency Industry,/Users/danielliu/Workspace/fin-rag/experiments...,What are the implications of the expected slow...,/Users/danielliu/Workspace/fin-rag/experiments...,
3,Ad Agency Industry,/Users/danielliu/Workspace/fin-rag/experiments...,How is Amazon's demand-side platform (DSP) imp...,/Users/danielliu/Workspace/fin-rag/experiments...,
4,Ad Agency Industry,/Users/danielliu/Workspace/fin-rag/experiments...,What is the outlook for organic sales growth i...,/Users/danielliu/Workspace/fin-rag/experiments...,
5,NVDA,/Users/danielliu/Workspace/fin-rag/experiments...,What are the expectations and key topics for N...,/Users/danielliu/Workspace/fin-rag/experiments...,
6,NVDA,/Users/danielliu/Workspace/fin-rag/experiments...,What are the current trends and future project...,/Users/danielliu/Workspace/fin-rag/experiments...,
7,NVDA,/Users/danielliu/Workspace/fin-rag/experiments...,What are the financial prospects and investmen...,/Users/danielliu/Workspace/fin-rag/experiments...,
8,NVDA,/Users/danielliu/Workspace/fin-rag/experiments...,What is the current state and outlook of the A...,/Users/danielliu/Workspace/fin-rag/experiments...,
9,NVDA,/Users/danielliu/Workspace/fin-rag/experiments...,What are the key factors influencing NVIDIA Co...,/Users/danielliu/Workspace/fin-rag/experiments...,


In [4]:
from kruppe.algorithm.agents import Librarian
from kruppe.data_source.news.nyt import NewYorkTimesData

llm = OpenAILLM()
nyt = NewYorkTimesData(headers_path="/Users/danielliu/Workspace/fin-rag/.nyt-headers.json")

librarian = Librarian(llm=llm, news_source=nyt)

In [5]:
from kruppe.algorithm.background_module.research import BackgroundResearcher

async def background_report(question: str):
    background_researcher = BackgroundResearcher(llm=llm, librarian=librarian, query=question)
    return await background_researcher.execute()

# example:
# report = await background_report(df.question[0])
# report

In [6]:
from tqdm.asyncio import tqdm

kruppe_dir = experiment_dir / "kruppe_report"
kruppe_dir.mkdir(exist_ok=True)

kruppe_reports = []

if "kruppe_report_loc" not in df.columns:
    df["kruppe_report_loc"] = [None] * df.shape[0]
async for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    if index < 2:
        continue
    
    question = row["question"]
    print("QUESTION:", question)

    report = await background_report(question)
    kruppe_reports.append(report)

    title = await generate_title(report)
    file_path = kruppe_dir / f"{title}.txt"
    file_path.write_text(report)
    df.at[index, 'kruppe_report_loc'] = file_path.as_posix()
    df.to_csv("./reports.csv", index=False)
    print(f"Saved report {title}")



kruppe_reports


  0%|          | 0/10 [00:00<?, ?it/s]

QUESTION: What are the implications of the expected slowdown in global advertising spending for major advertising agencies in 2025?
Creating info requests...
Created 5 info requests


<html><head>
<title>301 Moved Permanently</title>
</head><body>
<h1>Moved Permanently</h1>
<p>The document has moved <a href="https://archive.nytimes.com/bits.blogs.nytimes.com/2009/03/02/the-real-high-tech-immigrant-problem-theyre-leaving/">here</a>.</p>
</body></html>

<html><head>
<title>301 Moved Permanently</title>
</head><body>
<h1>Moved Permanently</h1>
<p>The document has moved <a href="https://archive.nytimes.com/bits.blogs.nytimes.com/2009/03/02/fresh-potential-targets-added-to-the-wheel-of-cisco/">here</a>.</p>
</body></html>

<html><head>
<title>301 Moved Permanently</title>
</head><body>
<h1>Moved Permanently</h1>
<p>The document has moved <a href="https://archive.nytimes.com/bits.blogs.nytimes.com/2009/03/02/nyc-seed-nurtures-homegrown-start-ups/">here</a>.</p>
</body></html>

<html><head>
<title>301 Moved Permanently</title>
</head><body>
<h1>Moved Permanently</h1>
<p>The document has moved <a href="https://archive.nytimes.com/gadgetwise.blogs.nytimes.com/2009/03/02/new-

Compiling report...


 40%|████      | 4/10 [01:26<02:10, 21.74s/it]

Saved report Implications_of_Global_Advertising_Spending_Slowdown_2025_Report
QUESTION: How is Amazon's demand-side platform (DSP) impacting The Trade Desk's (TTD) growth and competitiveness in the connected-TV advertising market?
Creating info requests...
Created 4 info requests
Compiling report...


 50%|█████     | 5/10 [02:39<02:55, 35.02s/it]

Saved report Amazon DSP vs TTD in Connected TV Advertising Market
QUESTION: What is the outlook for organic sales growth in the advertising industry amid economic uncertainty?
Creating info requests...
Created 5 info requests


<html lang="en"  xmlns:og="http://opengraphprotocol.org/schema/">
  <head>
    
    
    <meta charset="utf-8" />
    <title data-rh="true">Server Error - The New York Times</title>
      
    
    
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta property="fb:app_id" content="9869919170" />
<meta name="twitter:site" content="@nytimes" />
<meta name="slack-app-id" content="A0121HXPPTQ" />
    <script>(function $t(e){if(new URL(window.location).searchParams.get("sentryOverride")||Math.floor(100*Math.random())<=e){var n=['<script src="https://js.sentry-cdn.com/7bc8bccf5c254286a99b11c68f6bf4ce.min.js" crossorigin="anonymous">',"<","/script>"].join("");document.write(n)}})(1);</script><script>(function ei(e){var n=e.release,t=e.env,i=e.routeName,a=window.vi&&window.vi.webviewEnvironment||{},o=a.isPreloaded,r=void 0!==o&&o,l="IOS"===a.deviceType;if(window.Sentry){var s=function(){window.Sentry.init({maxBreadcrumbs:c,release:n,environment:t,allowUrls:u,ignoreError

Compiling report...


 60%|██████    | 6/10 [04:21<03:33, 53.34s/it]

Saved report Outlook for Organic Sales Growth in Advertising Amid Economic Uncertainty
QUESTION: What are the expectations and key topics for NVIDIA's annual GTC conference in 2025, particularly regarding AI and product innovations?
Creating info requests...
Created 5 info requests
Compiling report...


 70%|███████   | 7/10 [05:30<02:53, 57.73s/it]

Saved report NVIDIA_GTC_2025_Expectations_Report
QUESTION: What are the current trends and future projections for capital expenditures in the semiconductor industry, particularly regarding GPU demand and the impact of tariffs?
Creating info requests...
Created 4 info requests
Compiling report...


 80%|████████  | 8/10 [06:57<02:12, 66.29s/it]

Saved report Semiconductor_Capital_Expenditures_2023_Trends_Projections
QUESTION: What are the financial prospects and investment potential of NVIDIA Corporation in light of its recent performance and market dynamics?
Creating info requests...
Created 5 info requests
Compiling report...


 90%|█████████ | 9/10 [08:28<01:13, 73.41s/it]

Saved report NVIDIA_Financial_Prospects_Investment_Potential_Report
QUESTION: What is the current state and outlook of the AI semiconductor and hardware market amidst valuation changes and external economic pressures?
Creating info requests...
Created 5 info requests
Compiling report...


100%|██████████| 10/10 [10:06<00:00, 80.47s/it]

Saved report AI Semiconductor Market Outlook Amid Geopolitical Tensions and Economic Pressures
QUESTION: What are the key factors influencing NVIDIA Corporation's stock performance and investment outlook ahead of its GTC 2025 event?
Creating info requests...
Created 6 info requests


ERROR:kruppe.data_source.news.nyt:New York Times HTTP Error 429: New York Times query limit reached


retrieve_and_analyze took too long; proceeding with partial results...
Compiling report...


100%|██████████| 10/10 [16:26<00:00, 98.62s/it]

Saved report NVIDIA_Stock_Performance_Outlook_GTC_2025





["### Background Report on the Implications of Expected Slowdown in Global Advertising Spending for Major Advertising Agencies in 2025\n\nAs the advertising industry prepares for a potential slowdown in global advertising spending in 2025, multiple factors and trends are emerging that may significantly impact major advertising agencies. This report synthesizes insights from recent analyses, identifying critical dynamics in the industry, shifting consumer behavior, technological advancements, and economic pressures that could shape the landscape.\n\n#### 1. Economic Climate and Advertising Revenue Trends\n\nThe advertising sector is increasingly influenced by broader economic conditions, with many agencies already feeling the impact of client budget cuts due to economic downturns. For instance, substantial declines in advertising revenues, particularly within traditional broadcast networks, have been reported, with reductions estimated between 8% and 18% compared to previous years. This