In [13]:
# Imports
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

# Step 1: Get main FOMC page
url = "https://www.federalreserve.gov/monetarypolicy/fomccalendars.htm"
r = requests.get(url)
soup = BeautifulSoup(r.content, 'html.parser')

# Step 2: Extract all HTML speech links
press_links = []
for link in soup.find_all('a', href=True):
    href = link['href']
    text = link.text.strip()
    if "/newsevents/pressreleases/monetary" in href and "HTML" in text:
        press_links.append(href)

# Step 3: Pick the most recent one
def extract_date_from_href(href):
    match = re.search(r'monetary(\d{8})a\.htm', href)
    return int(match.group(1)) if match else 0

latest_link = max(press_links, key=extract_date_from_href)
latest_url = "https://www.federalreserve.gov" + latest_link
print("Latest URL:", latest_url)

# Step 4: Extract speech text
r = requests.get(latest_url)
soup = BeautifulSoup(r.content, 'html.parser')
div = soup.find('div', class_='col-xs-12 col-sm-8 col-md-8')
statement_text = div.get_text(separator=' ', strip=True) if div else ""

# Step 5: Lowercase
clean_text = statement_text.lower()

# Step 6: Remove final paragraph (voting and implementation note)
def remove_final_paragraph(text):
    pattern = r"voting for the monetary policy action.*?implementation note issued.*?$"
    return re.sub(pattern, '', text, flags=re.DOTALL).strip()

clean_text = remove_final_paragraph(clean_text)

# Step 7: Extract rate decision
def extract_rate_decision(text):
    if "decided to raise" in text:
        return "raise"
    elif "decided to lower" in text:
        return "lower"
    elif "decided to maintain" in text:
        return "maintain"
    else:
        return "unknown"

rate_decision = extract_rate_decision(clean_text)

# Step 8: Date from URL
date_str = re.search(r'monetary(\d{8})a\.htm', latest_url).group(1)



# Step 9: Prepare dataframe
df = pd.DataFrame([{
    "date": date_str,
    "statement_text": statement_text,
    "clean_text": clean_text,
    "rate_decision": rate_decision,
    "forecast_rate_decision": "",  # to be filled later
    "speech_length": len(clean_text.split())
}])

# Step 10: Save to CSV
df.to_csv("latest_fomc_speech.csv", index=False)
print("✅ latest_fomc_speech.csv created")

# Load the latest speech CSV
df = pd.read_csv("latest_fomc_speech.csv")

# Prompt user for forecast decision
manual_forecast = input("Enter forecast_rate_decision (raise / lower / maintain): ").strip().lower()

# Validate input
while manual_forecast not in ["raise", "lower", "maintain"]:
    manual_forecast = input("❌ Invalid. Please enter 'raise', 'lower', or 'maintain': ").strip().lower()

# Drop column if it already exists to avoid duplicates (optional safeguard)
if "forecast_rate_decision" in df.columns:
    df = df.drop(columns=["forecast_rate_decision"])

# Add it as the last column
df["forecast_rate_decision"] = manual_forecast

# Reorder columns so forecast is at the end
df = df[[col for col in df.columns if col != "forecast_rate_decision"] + ["forecast_rate_decision"]]

# Save updated CSV
df.to_csv("latest_fomc_speech.csv", index=False)

print(f"✅ Forecast rate decision added as: {manual_forecast}")

Latest URL: https://www.federalreserve.gov/newsevents/pressreleases/monetary20250507a.htm
✅ latest_fomc_speech.csv created


Enter forecast_rate_decision (raise / lower / maintain):  lol
❌ Invalid. Please enter 'raise', 'lower', or 'maintain':  maintain


✅ Forecast rate decision added as: maintain


In [9]:
print(meeting_blocks)

[<div "="" class="row fomc-meeting">
<div class="fomc-meeting__month col-xs-5 col-sm-3 col-md-2"><strong>January</strong></div>
<div class="fomc-meeting__date col-xs-4 col-sm-9 col-md-10 col-lg-1">28-29</div>
<div class="col-xs-12 col-md-4 col-lg-2">
<strong>Statement:</strong><br/>
<a href="/monetarypolicy/files/monetary20250129a1.pdf">PDF</a> | <a href="/newsevents/pressreleases/monetary20250129a.htm">HTML</a><br/>
<a href="/newsevents/pressreleases/monetary20250129a1.htm">Implementation Note</a>
</div>
<div class="col-xs-12 col-md-4 col-lg-3">
<a href="/monetarypolicy/fomcpresconf20250129.htm">Press Conference</a><br/>
<br/>
</div>
<div class="col-xs-12 col-md-4 col-lg-4 fomc-meeting__minutes">
<strong>Minutes:</strong><br/>
<a href="/monetarypolicy/files/fomcminutes20250129.pdf">PDF</a> | <a href="/monetarypolicy/fomcminutes20250129.htm">HTML</a>
<br/> (Released February 19, 2025)
                    	
                    	
                    	</div>
</div>, <div "="" class="row f