# Final Coding

In [234]:
import pandas as pd
from bs4 import BeautifulSoup
import requests as rq
import os
import time

def Scrape_FED():
    # fomc meeting website
    url = 'https://www.federalreserve.gov/monetarypolicy/fomccalendars.htm'
    response = rq.get(url)
    # web crawling
    if response.status_code == 200:
            dom = BeautifulSoup(response.text, 'html.parser')
            elements_month = dom.select('.panel .fomc-meeting .fomc-meeting__month>strong')
            elements_day =  dom.select('.panel .fomc-meeting .fomc-meeting__date')
            elements_link = dom.select('.panel .fomc-meeting .col-xs-12>a')

            result_month =[
                {
                  'Month' : element_month.text.strip()  
                }
                for element_month in elements_month
            ]

            result_day = [
                {
                    'Day' : element_day.text.strip()
                }
                for element_day in elements_day
            ]


            result_link = [
                {
                    'Link' : 'https://www.federalreserve.gov'+ element_link.attrs['href'].strip()
                }
                for element_link in elements_link if element_link.find(string='Implementation Note')
            ]

    
            Fed_month = pd.DataFrame(result_month)
            Fed_day = pd.DataFrame(result_day)
            Fed_link = pd.DataFrame(result_link)

            Fed_month.reset_index(drop=True, inplace=True)
            Fed_day.reset_index(drop=True, inplace=True)
            Fed_link.reset_index(drop=True, inplace=True)
            Fed_final = pd.concat([Fed_month, Fed_day, Fed_link], axis =1 )
            # save as excel
            now=time.strftime('%Y%m%d')
            filepath = os.path.join( os.getcwd(), f'FED_announcements{now}.xlsx' )
            Fed_final.to_excel(filepath, sheet_name= '2020-2015 FOMC', index=False)
            
            return Fed_final.head(10)

In [235]:
Scrape_FED()

Unnamed: 0,Month,Day,Link
0,January,28-29,https://www.federalreserve.gov/newsevents/pres...
1,March,3 (unscheduled),https://www.federalreserve.gov/newsevents/pres...
2,March,15 (unscheduled),https://www.federalreserve.gov/newsevents/pres...
3,March,17-18* (cancelled),https://www.federalreserve.gov/newsevents/pres...
4,March,19 (notation vote),https://www.federalreserve.gov/newsevents/pres...
5,March,23 (notation vote),https://www.federalreserve.gov/newsevents/pres...
6,March,31 (notation vote),https://www.federalreserve.gov/newsevents/pres...
7,April,28-29,https://www.federalreserve.gov/newsevents/pres...
8,June,9-10*,https://www.federalreserve.gov/newsevents/pres...
9,July,28-29,https://www.federalreserve.gov/newsevents/pres...


# Coding Procedures

In [127]:
# get url & html
url = 'https://www.federalreserve.gov/monetarypolicy/fomccalendars.htm'
response = rq.get(url)
dom = BeautifulSoup(response.text, 'html.parser')
dom

ï»¿<!DOCTYPE html>

<html class="no-js" lang="en">
<head>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible">
<meta content="width=device-width, initial-scale=1.0, minimum-scale=1.0 maximum-scale=1.6, user-scalable=1" name="viewport"/>
<meta content="Board of Governors of the Federal Reserve System, Federal Reserve Board of Governors, Federal Reserve Board, Federal Reserve" name="keywords"/>
<meta content="The Federal Reserve Board of Governors in Washington DC." name="description"/>
<meta content="Board of Governors of the Federal Reserve System" property="og:site_name"/>
<meta content="article" property="og:type"/>
<meta content="" property="og:image"/>
<meta content="summary" name="twitter:card"/>
<meta content="" name="twitter:image"/>
<title>The Fed - Meeting calendars and information</title>
<link href="/css/bootstrap.css" rel="stylesheet" type="text/css"/>
<link href="/css/bluesteel-theme.css" rel="stylesheet" type="text/css"/>
<script src="/js/moderniz

In [128]:
# month of annoucement
a = dom.select('.panel .fomc-meeting .fomc-meeting__month>strong')
for a in dom.select('.panel .fomc-meeting .fomc-meeting__month>strong'):
    print(a.text.strip())

January
March
March
March
March
March
March
April
June
July
August
September
November
December
January
March
April/May
June
July
September
October
October
December
January
March
May
June
Jul/Aug
September
November
December
Jan/Feb
March
May
June
July
September
Oct/Nov
December
January
March
April
June
July
September
November
December
January
March
April
June
July
September
October
December
January
March
April
June
July
September
November
December


In [237]:
# day of announcement
b = dom.select('.panel .fomc-meeting .fomc-meeting__date')
for b in dom.select('.panel .fomc-meeting .fomc-meeting__date'):
    print(b.text.strip())

28-29
3 (unscheduled)
15 (unscheduled)
17-18* (cancelled)
19 (notation vote)
23 (notation vote)
31 (notation vote)
28-29
9-10*
28-29
27 (notation vote)
15-16*
4-5
15-16*
29-30
19-20*
30-1
18-19*
30-31
17-18*
4 (unscheduled)
29-30
10-11*
30-31
20-21*
1-2
12-13*
31-1
25-26*
7-8
18-19*
31-1
14-15*
2-3
13-14*
25-26
19-20*
31-1
12-13*
26-27
15-16*
26-27
14-15*
26-27
20-21*
1-2
13-14*
27-28
17-18*
28-29
16-17*
28-29
16-17*
27-28
15-16*
26-27
16-17*
27-28
15-16*
27-28
21-22*
2-3
14-15*


In [179]:
# Link for Implementation note
d = []
for c in dom.select('.panel .fomc-meeting .col-xs-12>a'):
    if c.find(string='Implementation Note'):
        d.append('https://www.federalreserve.gov'+c.attrs['href'].strip())
d

['https://www.federalreserve.gov/newsevents/pressreleases/monetary20200129a1.htm',
 'https://www.federalreserve.gov/newsevents/pressreleases/monetary20200303a1.htm',
 'https://www.federalreserve.gov/newsevents/pressreleases/monetary20200315a1.htm',
 'https://www.federalreserve.gov/newsevents/pressreleases/monetary20200429a1.htm',
 'https://www.federalreserve.gov/newsevents/pressreleases/monetary20200610a1.htm',
 'https://www.federalreserve.gov/newsevents/pressreleases/monetary20200729a1.htm',
 'https://www.federalreserve.gov/newsevents/pressreleases/monetary20200916a1.htm',
 'https://www.federalreserve.gov/newsevents/pressreleases/monetary20190130a1.htm',
 'https://www.federalreserve.gov/newsevents/pressreleases/monetary20190320a1.htm',
 'https://www.federalreserve.gov/newsevents/pressreleases/monetary20190501a1.htm',
 'https://www.federalreserve.gov/newsevents/pressreleases/monetary20190619a1.htm',
 'https://www.federalreserve.gov/newsevents/pressreleases/monetary20190731a1.htm',
 'ht

In [183]:
# list comprehension
elements_month = dom.select('.panel .fomc-meeting .fomc-meeting__month>strong')
elements_day =  dom.select('.panel .fomc-meeting .fomc-meeting__date')
elements_link = dom.select('.panel .fomc-meeting .col-xs-12>a')

result_month =[
    {
      'Month' : element_month.text.strip()  
    }
    for element_month in elements_month
]

result_day = [
    {
        'Day' : element_day.text.strip()
    }
    for element_day in elements_day
]


result_link = [
    {
        'Link' : 'https://www.federalreserve.gov'+ element_link.attrs['href'].strip()
    }
    for element_link in elements_link if element_link.find(string='Implementation Note')
]

    
Fed_month = pd.DataFrame(result_month)
Fed_day = pd.DataFrame(result_day)
Fed_link = pd.DataFrame(result_link)

Fed_month.reset_index(drop=True, inplace=True)
Fed_day.reset_index(drop=True, inplace=True)
Fed_link.reset_index(drop=True, inplace=True)
Fed_final = pd.concat([Fed_month, Fed_day, Fed_link], axis =1 )
Fed_final


Unnamed: 0,Month,Day,Link
0,January,28-29,https://www.federalreserve.gov/newsevents/pres...
1,March,3 (unscheduled),https://www.federalreserve.gov/newsevents/pres...
2,March,15 (unscheduled),https://www.federalreserve.gov/newsevents/pres...
3,March,17-18* (cancelled),https://www.federalreserve.gov/newsevents/pres...
4,March,19 (notation vote),https://www.federalreserve.gov/newsevents/pres...
...,...,...,...
58,June,15-16*,
59,July,27-28,
60,September,21-22*,
61,November,2-3,
