In [1]:
import bs4
import pandas
import urllib
import lxml
import datetime

In [2]:
# use datetime.datetime.today() to get today's date and time. Here it is to get the current year
current_year = datetime.datetime.today().year
current_year

# to get the dates when beige books are released this year
current_dates_link = 'https://www.federalreserve.gov/monetarypolicy/beige-book-default.htm'
# to get the dates when the past beige books were released
past_dates_link = lambda year: 'https://www.federalreserve.gov/monetarypolicy/beigebook%d.htm' %year

# send back the link of beige book (html). There are three kinds of format (before 2010, 2010-2016, after 2016)
def a_book_link_federalreserve(date):
    date_time = pandas.to_datetime(date)
    date_year = date_time.year
    if date_year <= 2010:
        link = 'https://www.federalreserve.gov/fomc/beigebook/%s/%s/FullReport.htm' %(date_year, date_time.strftime('%Y%m%d'))
    elif date_year > 2010 and date_year <=2016:
        link = 'https://www.federalreserve.gov/monetarypolicy/beigebook/beigebook%s.htm' %(date_time.strftime('%Y%m'))
    elif date_year > 2016:
        link = 'https://www.federalreserve.gov/monetarypolicy/beigebook%s.htm' %(date_time.strftime('%Y%m'))
    return link
def read_beigebook_federalreserve(date):
    print('link of beige book on %s is %s\n' %(date, a_book_link_federalreserve(date)))
    # use urllib.request.urlopen([link]).read() to read the html from the website, and a_book_link_federalreserve(date) will send back the link of beige book on date
    html_words = urllib.request.urlopen(a_book_link_federalreserve(date)).read()
    # use bs4 to organize and sort the content of website
    soup = bs4.BeautifulSoup(html_words, 'lxml')
    # transform the words into string
    paragraph = soup.text
    # eliminate some nonsense characters in html format
    for item in ['<p>', '</p>', '<strong>', '</strong>', '<em>', '<br/>', '\n']:
        paragraph = paragraph.replace(item, '')
    paragraph = paragraph.replace('\r', ' ').replace('    ', ' ')
    contents = paragraph.split('. ')
    # transform the content that we want to send back as dictionary
    returns = {
        'html_words': html_words,
        'soup': soup,
        'paragraph': paragraph,
        'contents': contents
    }
    return returns

In [3]:
total_dates = []
# get all the dates of beige book
current_dates = pandas.read_html(current_dates_link)[0].dropna()
total_current_dates = current_dates[str(current_year)].tolist()
total_current_dates = ['%d %s' %(current_year, date) for date in total_current_dates]
total_current_dates = [pandas.to_datetime(date).strftime('%Y%m%d') for date in total_current_dates]
total_dates += total_current_dates
for year in range(1996, current_year):
    past_years_dates = pandas.read_html(past_dates_link(year))[0]
    past_years_dates = past_years_dates[str(year)].tolist()
    past_years_dates = ['%d %s' %(year, date) for date in past_years_dates]
    past_years_dates = [pandas.to_datetime(date).strftime('%Y%m%d') for date in past_years_dates]
    total_dates += past_years_dates
total_dates.sort()

In [4]:
print('total date num %d\n%s' %(len(total_dates), total_dates))

total date num 178
['19961030', '19961204', '19970122', '19970312', '19970507', '19970618', '19970806', '19970917', '19971029', '19971203', '19980121', '19980318', '19980506', '19980617', '19980805', '19980916', '19981104', '19981209', '19990120', '19990317', '19990505', '19990616', '19990811', '19990922', '19991103', '19991208', '20000119', '20000308', '20000503', '20000614', '20000809', '20000920', '20001101', '20001206', '20010117', '20010307', '20010502', '20010613', '20010808', '20010919', '20011024', '20011128', '20020116', '20020306', '20020424', '20020612', '20020731', '20020911', '20021023', '20021127', '20030115', '20030305', '20030423', '20030611', '20030730', '20031015', '20031126', '20040114', '20040303', '20040421', '20040616', '20040728', '20040908', '20041027', '20041201', '20050119', '20050309', '20050420', '20050615', '20050727', '20050907', '20051019', '20051130', '20060118', '20060315', '20060426', '20060614', '20060726', '20060906', '20061012', '20061129', '2007011

In [5]:
returns_1 = read_beigebook_federalreserve(total_dates[-1])
returns_1

link of beige book on 20190116 is https://www.federalreserve.gov/monetarypolicy/beigebook201901.htm



{'contents': ['The Fed - Beige Book - January 16, 2019  grunticon(["/css/icons.data.svg.css", "/css/icons.data.png.css", "/css/icons.fallback.css"]);   (function(i,s,o,g,r,a,m){i[\'GoogleAnalyticsObject\']=r;i[r]=i[r]||function(){  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)  })(window,document,\'script\',\'//www.google-analytics.com/analytics.js\',\'ga\');  ga(\'create\', \'UA-35121701-1\', \'federalreserve.gov\', {\'cookieExpires\': 0});  ga(\'set\', \'anonymizeIp\', true);  ga(\'send\', \'pageview\');Skip to main contentBack to HomeBoard of Governors of the Federal Reserve SystemStay ConnectedLink to Federal Reserve Facebook PageLink to Federal Reserve Twitter PageLink to Federal Reserve YouTube PageLink to Federal Reserve Flickr PageFederal Reserve LinkedIn PageSubscribe to RSSSubscribe to EmailRecent PostingsCalendarPublicationsSite MapA-Z indexCareersFAQsVideosConta

In [6]:
BeigeBook = {}
for date in total_dates:
    try:
        key_name = 'BeigeBook%s'%(date)
        BeigeBook.update({key_name : read_beigebook_federalreserve(date)})
    except Exception as e:
        print(e)
        pass

link of beige book on 19961030 is https://www.federalreserve.gov/fomc/beigebook/1996/19961030/FullReport.htm

link of beige book on 19961204 is https://www.federalreserve.gov/fomc/beigebook/1996/19961204/FullReport.htm

link of beige book on 19970122 is https://www.federalreserve.gov/fomc/beigebook/1997/19970122/FullReport.htm

link of beige book on 19970312 is https://www.federalreserve.gov/fomc/beigebook/1997/19970312/FullReport.htm

link of beige book on 19970507 is https://www.federalreserve.gov/fomc/beigebook/1997/19970507/FullReport.htm

link of beige book on 19970618 is https://www.federalreserve.gov/fomc/beigebook/1997/19970618/FullReport.htm

link of beige book on 19970806 is https://www.federalreserve.gov/fomc/beigebook/1997/19970806/FullReport.htm

link of beige book on 19970917 is https://www.federalreserve.gov/fomc/beigebook/1997/19970917/FullReport.htm

link of beige book on 19971029 is https://www.federalreserve.gov/fomc/beigebook/1997/19971029/FullReport.htm

link of be

link of beige book on 20060426 is https://www.federalreserve.gov/fomc/beigebook/2006/20060426/FullReport.htm

link of beige book on 20060614 is https://www.federalreserve.gov/fomc/beigebook/2006/20060614/FullReport.htm

link of beige book on 20060726 is https://www.federalreserve.gov/fomc/beigebook/2006/20060726/FullReport.htm

link of beige book on 20060906 is https://www.federalreserve.gov/fomc/beigebook/2006/20060906/FullReport.htm

link of beige book on 20061012 is https://www.federalreserve.gov/fomc/beigebook/2006/20061012/FullReport.htm

link of beige book on 20061129 is https://www.federalreserve.gov/fomc/beigebook/2006/20061129/FullReport.htm

link of beige book on 20070117 is https://www.federalreserve.gov/fomc/beigebook/2007/20070117/FullReport.htm

link of beige book on 20070307 is https://www.federalreserve.gov/fomc/beigebook/2007/20070307/FullReport.htm

link of beige book on 20070425 is https://www.federalreserve.gov/fomc/beigebook/2007/20070425/FullReport.htm

link of be

link of beige book on 20150715 is https://www.federalreserve.gov/monetarypolicy/beigebook/beigebook201507.htm

link of beige book on 20150902 is https://www.federalreserve.gov/monetarypolicy/beigebook/beigebook201509.htm

link of beige book on 20151014 is https://www.federalreserve.gov/monetarypolicy/beigebook/beigebook201510.htm

link of beige book on 20151202 is https://www.federalreserve.gov/monetarypolicy/beigebook/beigebook201512.htm

link of beige book on 20160113 is https://www.federalreserve.gov/monetarypolicy/beigebook/beigebook201601.htm

link of beige book on 20160302 is https://www.federalreserve.gov/monetarypolicy/beigebook/beigebook201603.htm

link of beige book on 20160413 is https://www.federalreserve.gov/monetarypolicy/beigebook/beigebook201604.htm

link of beige book on 20160601 is https://www.federalreserve.gov/monetarypolicy/beigebook/beigebook201606.htm

link of beige book on 20160713 is https://www.federalreserve.gov/monetarypolicy/beigebook/beigebook201607.htm

l

In [None]:
BeigeBook

In [7]:
print(len(BeigeBook.keys()))
print(len(total_dates))

176
178


In [None]:
# https://pythonspot.com/save-a-dictionary-to-a-file/
# https://stackabuse.com/saving-text-json-and-csv-to-a-file-in-python/
# save the whole dataset(dictionary) as a single txt file
'''
f = open('/Users/tina/Documents/2019/AA 2019 Spring/LionBase/Week 4/BeigeBook/BeigeBook.txt','w')
f.write(str(BeigeBook))
f.close()
'''

In [None]:
# save the dataset (dictionary) individually based on the date --json
for key in BeigeBook:
    f = open("/Users/tina/Documents/2019/AA 2019 Spring/LionBase/Week 4/IndividualBB_json/%s.json"%key,'w')
    f.write(str(BeigeBook))
    f.close()

In [10]:
# save the dataset (dictionary) individually based on the date --txt
for key in BeigeBook:
    f = open("/Users/tina/Documents/2019/AA 2019 Spring/LionBase/Week 4/IndividualBB_txt/%s.txt"%key,'w')
    f.write(str(BeigeBook))
    f.close()