# Federal Reserve Beige Book Text Analysis

In [1]:
import pandas as pd
import re
from requests import get
from collections import Counter
from time import sleep

import scrapy
from bs4 import BeautifulSoup
from scrapy import Selector
from scrapy.http import HtmlResponse
import nltk

## Acquire 

In [2]:
# Link to a Beige Book
url = 'https://www.federalreserve.gov/monetarypolicy/beigebook202001.htm'

# Headers for the server to acknowledge my request
headers = {'User_Agent': 'Promeos'}

response = get(url, headers)

In [3]:
response.status_code

200

## Prepare

In [4]:
html = response.text.replace('<br />', '')

sel = Selector(text=html, type='html')

title = sel.xpath('//title/text()').get()

In [5]:
fed_district_banks = sel.xpath('//h4/text()').extract()

In [6]:
headings = sel.xpath('.//strong/text()').getall()
headings_frequency = Counter(headings)

headings_frequency.most_common(5)

[('Summary of Economic Activity', 12),
 ('Employment and Wages ', 10),
 ('Prices ', 9),
 ('Manufacturing', 8),
 ('Real Estate and Construction', 6)]

In [7]:
economic_summary_list = sel.xpath('.//h4/following-sibling::p[contains(., "Summary of Economic Activity")]/text()').extract()

economic_summaries = [summary.replace('\n', '').strip() for summary in economic_summary_list]

In [8]:
for bank, eco in zip(fed_district_banks, economic_summaries):
    print(bank + '\n' + eco, end='\n\n')

Federal Reserve Bank of Boston
Economic activity continued to expand in the First District at the close of 2019. Retailers and manufacturers cited modest to moderate revenue increases from a year earlier, while results for software and information technology services firms were stronger. Respondents in these three sectors also reported continued tight labor markets, but none mentioned increasing wages (other than scheduled minimum wage rises in some states). Price increases continued to be modest according to contacts. Inventory shortages persisted in New England residential real estate markets, and median sales prices for single-family homes and condos rose across the region in November. Greater Boston's commercial real estate market continued to be robust, while commercial activity in the Providence area was moderate. Business outlooks ranged from very positive to cautiously optimistic, mostly reflecting expectations that recent trends will continue into 2020.

Federal Reserve Bank o

In [9]:
a = pd.DataFrame({title: economic_summaries})

In [10]:
a = a.join(pd.DataFrame({title + '01': economic_summaries}))

In [11]:
url_2020 = 'https://www.federalreserve.gov/monetarypolicy/beigebook2020.htm'

In [12]:
response = get(url_2020, headers)

In [13]:
toc_html = Selector(text=response.text)

In [14]:
toc_html.xpath('//a[contains(., "HTML")]/@href').extract()

['/monetarypolicy/beigebook202001.htm',
 '/monetarypolicy/beigebook202003.htm',
 '/monetarypolicy/beigebook202004.htm',
 '/monetarypolicy/beigebook202005.htm',
 '/monetarypolicy/beigebook202007.htm',
 '/monetarypolicy/beigebook202009.htm',
 '/monetarypolicy/beigebook202010.htm',
 '/monetarypolicy/beigebook202012.htm']

In [15]:
years = list(map(str, [*range(2017, 2021)]))

root_url = 'https://www.federalreserve.gov'
beige_book_path = '/monetarypolicy/beigebook'
end_point = '.htm'

In [16]:
annual_beige_book = [root_url + beige_book_path + year + end_point for year in years]
for i in annual_beige_book: print(i)

https://www.federalreserve.gov/monetarypolicy/beigebook2017.htm
https://www.federalreserve.gov/monetarypolicy/beigebook2018.htm
https://www.federalreserve.gov/monetarypolicy/beigebook2019.htm
https://www.federalreserve.gov/monetarypolicy/beigebook2020.htm


In [17]:
beige_book_links = []

for link in annual_beige_book:
    response = get(link, headers)
    sleep(5)
    main_page = Selector(text=response.text)
    beige_book_paths = main_page.xpath('//a[contains(., "HTML")]/@href').extract()
    
    links = [root_url + path for path in beige_book_paths]
    
    beige_book_links.extend(links)

In [18]:
beige_book_links

['https://www.federalreserve.gov/monetarypolicy/beigebook201701.htm',
 'https://www.federalreserve.gov/monetarypolicy/beigebook201703.htm',
 'https://www.federalreserve.gov/monetarypolicy/beigebook201704.htm',
 'https://www.federalreserve.gov/monetarypolicy/beigebook201705.htm',
 'https://www.federalreserve.gov/monetarypolicy/beigebook201707.htm',
 'https://www.federalreserve.gov/monetarypolicy/beigebook201709.htm',
 'https://www.federalreserve.gov/monetarypolicy/beigebook201710.htm',
 'https://www.federalreserve.gov/monetarypolicy/beigebook201711.htm',
 'https://www.federalreserve.gov/monetarypolicy/beigebook201801.htm',
 'https://www.federalreserve.gov/monetarypolicy/beigebook201803.htm',
 'https://www.federalreserve.gov/monetarypolicy/beigebook201804.htm',
 'https://www.federalreserve.gov/monetarypolicy/beigebook201805.htm',
 'https://www.federalreserve.gov/monetarypolicy/beigebook201807.htm',
 'https://www.federalreserve.gov/monetarypolicy/beigebook201809.htm',
 'https://www.federa

In [19]:
link_string = ' '.join(beige_book_links)
publish_dates = re.findall('\d+', link_string)

In [31]:
beige_books = []

for l in beige_book_links:   
    response = get(l, headers)
    sleep(5)
    
    html = response.text.replace('<br />', '').replace('<br>', '')
    sel = Selector(text=html, type='html')

    title = sel.xpath('//title/text()').get()
    economic_summary_list = sel.xpath('.//h4/following-sibling::p[contains(., "Summary of Economic Activity")]/text()').extract()
    economic_summaries = [summary.replace('\n', '').strip() for summary in economic_summary_list]
    
    beige_books.append(economic_summaries)

In [34]:
for l, date in zip(beige_books, publish_dates):
    print(len(l) == 12, date)

True 201701
True 201703
True 201704
True 201705
True 201707
True 201709
True 201710
True 201711
False 201801
True 201803
True 201804
True 201805
False 201807
False 201809
False 201810
True 201812
True 201901
True 201903
False 201904
False 201906
True 201907
False 201909
True 201910
False 201911
True 202001
True 202003
True 202004
True 202005
True 202007
False 202009
True 202010
True 202012


In [30]:
pd.DataFrame(beige_books)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,Business activity continued to expand in the F...,Economic activity in the Second District has h...,Aggregate business activity in the Third Distr...,Economic activity grew slightly on balance acr...,Fifth District economic activity grew at a mod...,Sixth District business contacts indicated tha...,Growth in economic activity in the Seventh Dis...,Information from contacts suggests that econom...,The Ninth District economy grew modestly overa...,Economic activity in the Tenth District increa...,Economic activity in the Eleventh District exp...,Economic activity in the Twelfth District cont...,,,,
1,First District businesses contacted in early F...,Economic activity in the Second District has p...,Aggregate business activity in the Third Distr...,Economic activity grew moderately on balance a...,Economic activity in the Fifth District grew m...,Business contacts indicated that economic acti...,The labor market remained tight and wage press...,Growth in economic activity in the Seventh Dis...,Economic conditions have continued to expand a...,The Ninth District economy grew modestly overa...,District economic activity continued to expand...,Economic activity in the Eleventh District exp...,Economic activity in the Twelfth District cont...,,,
2,Business activity continued to expand in the F...,Economic activity in the Second District has e...,Aggregate business activity in the Third Distr...,Economic activity grew moderately on balance a...,The Fifth District economy expanded at a sligh...,Sixth District business contacts indicated tha...,Growth in economic activity in the Seventh Dis...,Reports from contacts suggest that economic ac...,The Ninth District economy grew modestly overa...,Economic activity in the Tenth District increa...,Economic activity in the Eleventh District exp...,Economic activity in the Twelfth District cont...,,,,
3,Reports from business contacts in the First Di...,Economic activity in the Second District has b...,Aggregate business activity in the Third Distr...,Aggregate business activity expanded at a mode...,The Fifth District economy expanded at a modes...,Sixth District business contacts reported econ...,Growth in economic activity in the Seventh Dis...,Economic conditions have continued to expand a...,The Ninth District economy grew modestly overa...,Economic activity in the Tenth District contin...,The Eleventh District economy continued to exp...,Economic activity in the Twelfth District cont...,,,,
4,Business activity in the First District expand...,Economic activity in the Second District has e...,Aggregate business activity in the Third Distr...,Growth in economic activity across the Fourth ...,"The Fifth District economy expanded modestly, ...","On balance, reports from Sixth District busine...",Growth in economic activity in the Seventh Dis...,Reports from contacts suggest economic conditi...,"Since the last reporting period, the Ninth Dis...",Economic activity in the Tenth District expand...,The Eleventh District economy continued to exp...,Economic activity in the Twelfth District cont...,,,,
5,Business contacts indicated that economic acti...,Economic activity in the Second District has p...,Aggregate business activity in the Third Distr...,Aggregate business activity grew at a moderate...,The Fifth District economy continued to expand...,According to reports from businesses across th...,Growth in economic activity in the Seventh Dis...,Economic conditions have improved at a modest ...,The Ninth District economy grew modestly overa...,Economic activity in the Tenth District increa...,The Eleventh District economy continued to exp...,Economic activity in the Twelfth District cont...,,,,
6,Most business contacts in the First District r...,Economic activity in the Second District conti...,Aggregate business activity in the Third Distr...,Economic activity in the Fourth District expan...,The Fifth District economy grew at a moderate ...,"Aside from hurricane effects, Sixth District b...",Growth in economic activity in the Seventh Dis...,Reports from contacts suggest economic conditi...,The Ninth District economy grew modestly since...,Economic activity in the Tenth District contin...,The Eleventh District economy continued to exp...,Economic activity in the Twelfth District cont...,,,,
7,Business activity in the First District contin...,Economic activity in the Second District conti...,Aggregate business activity in the Third Distr...,Business activity grew at a moderate pace in t...,The Fifth District economy grew at a moderate ...,Reports from Sixth District business contacts ...,Economic activity in the Seventh District incr...,Economic conditions in the District have impro...,The Ninth District economy grew modestly overa...,Economic activity in the Tenth District increa...,The Eleventh District economy continued to exp...,Economic activity in the Twelfth District cont...,,,,
8,Business activity expanded modestly in the Fir...,Economic activity in the Second District conti...,Aggregate business activity in the Third Distr...,Business activity in the Fourth District grew ...,"Since our previous report, the Fifth District ...",Sixth District business contacts remained larg...,Growth in economic activity in the Seventh Dis...,Reports from contacts indicate that economic c...,Ninth District economic activity increased mod...,Economic activity and employment in the Tenth ...,Real estate activity in the District was mixed...,Banking contacts reported steady overall loan ...,Low crop prices continued to weigh on District...,The Eleventh District economy expanded at a ro...,Economic activity in the Twelfth District cont...,
9,Economic activity expanded at a moderate pace ...,Economic activity in the Second District grew ...,Aggregate business activity in the Third Distr...,Business activity in the Fourth District expan...,The Fifth District economy expanded at a moder...,Business contacts indicated that economic acti...,Growth in economic activity in the Seventh Dis...,Economic conditions have improved at a modest ...,The Ninth District economy grew moderately ove...,Economic activity in the Tenth District contin...,The Eleventh District economy expanded at a mo...,Economic activity in the Twelfth District cont...,,,,
