# FOMC Statement Scraper
This notebook downloads FOMC statements from the Fed website based on dates provided in a file called `dates.sort.txt`. Each statement is saved in a text file.

In [1]:
# Importing required libraries
from urllib.request import Request, urlopen

from bs4 import BeautifulSoup
from urllib.request import urlopen
from time import sleep
import re, csv, os

In [2]:
# Setting the output directory to save statements
outdir = os.path.join('statements', 'statements.raw')
os.makedirs(outdir, exist_ok=True)

### Function to get URL of FOMC statement based on date

In [3]:
def FOMCstatementURL(date):
    year = date[0:4]
    dateInt = int(date)

    if dateInt == 20081216:
        urlout = 'http://www.federalreserve.gov/newsevents/press/monetary/' + date + 'b.htm'
    elif 19990501 <= dateInt < 20020331:
        urlout = 'http://www.federalreserve.gov/boarddocs/press/general/' + year + '/' + date + '/'
    elif 20020501 <= dateInt < 20030000:
        urlout = 'http://www.federalreserve.gov/boarddocs/press/monetary/' + year + '/' + date + '/'
    elif 20030000 <= dateInt < 20060000:
        urlout = 'http://www.federalreserve.gov/boarddocs/press/monetary/' + year + '/' + date + '/default.htm'
    elif dateInt >= 20050000:
        urlout = 'http://www.federalreserve.gov/newsevents/press/monetary/' + date + 'a.htm'
    return urlout

### Function to get and extract the statement from the webpage

In [4]:
def getStatement(mtgDate):
    print(' Pulling:', mtgDate)

    url = FOMCstatementURL(mtgDate)
    req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
    html = urlopen(req).read()

    soup = BeautifulSoup(html, 'html.parser')
    allText = soup.get_text(" ")

    # Safe start match
    start_match = re.search(r"[Ff]or\s[Ii]mmediate\s[Rr]elease", allText)
    if not start_match:
        print(f" Start pattern not found for {mtgDate}")
        return ""

    statementText = allText[start_match.start():]

    # Safe end match
    end_match = re.search(r"[0-9]{4}\s[Mm]onetary\s[Pp]olicy", statementText)
    if end_match:
        statementText = statementText[:end_match.start()]
    else:
        print(f" End pattern not found for {mtgDate}, saving full content after start")

    # Clean special characters
    statementText = statementText.encode('ascii', 'ignore').decode('ascii')

    return statementText


### Main logic to read dates and save statements

In [5]:
def main():
    # Read the date list
    date_path = os.path.join('data', 'dates.sort.txt')
    releaseDates = [line.strip() for line in open(date_path, 'r')]

    for releaseDate in releaseDates:
        data = getStatement(releaseDate)
        sleep(2)  # Be polite to the server

        # Fix for special case
        if "20070618" in releaseDate:
            releaseDate = "20070628"

        filename = f"statement.fomc.{releaseDate}.txt"
        filepath = os.path.join(outdir, filename)
        with open(filepath, 'w') as f:
            f.write(data)

### Run the main function

In [6]:
main()

 Pulling: 19990518
 Pulling: 19990630
 Pulling: 19990824
 Pulling: 19991005
 Pulling: 19991116
 Pulling: 19991221
 Pulling: 20000202
 Pulling: 20000321
 Pulling: 20000516
 Pulling: 20000628
 Pulling: 20000822
 Pulling: 20001003
 Pulling: 20001115
 Pulling: 20001219
 Pulling: 20010131
 Pulling: 20010320
 Pulling: 20010515
 Pulling: 20010627
 Pulling: 20010821
 Pulling: 20011002
 Pulling: 20011106
 Pulling: 20011211
 Pulling: 20020130
 Pulling: 20020319
 Pulling: 20020507
 Pulling: 20020626
 Pulling: 20020813
 Pulling: 20020924
 Pulling: 20021106
 Pulling: 20021210
 Pulling: 20030129
 Pulling: 20030318
 Pulling: 20030506
 Pulling: 20030625
 Pulling: 20030812
 Pulling: 20030916
 Pulling: 20031028
 Pulling: 20031209
 Pulling: 20040128
 Pulling: 20040316
 Pulling: 20040504
 Pulling: 20040630
 Pulling: 20040810
 Pulling: 20040921
 Pulling: 20041110
 Pulling: 20041214
 Pulling: 20050202
 Pulling: 20050322
 Pulling: 20050503
 Pulling: 20050630
 Pulling: 20050809
 Pulling: 20050920
 Pulling: 20