In [130]:
import requests
from bs4 import BeautifulSoup
import pandas as pd


In [131]:
def fetch_page_content(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        return None
def find_speech_links(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    # Assuming each link is in an <a> tag within a list or table of speeches
    # You'll need to adjust the selector based on the actual HTML structure
    links = soup.find_all('a', href=True)
    speech_links = []
    for link in links:
        # Filter out the links that don't lead to speeches or adjust the condition based on the URL pattern
        if 'newsevents/speech' in link['href']:
            full_url = f"https://www.federalreserve.gov{link['href']}"
            speech_links.append(full_url)
    return speech_links
def fetch_yearly_speech_links(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    speech_links = []
    
    # Assuming each link to a speech is within an <a> tag directly under the specified container
    # You might need to adjust the selector based on the actual page structure
    for a in soup.find_all('a', href=True):
        if '/newsevents/speech/' in a['href']:
            full_url = f"https://www.federalreserve.gov{a['href']}"
            speech_links.append(full_url)
    
    return speech_links
def fetch_speech_links(url):
    main_page_content = fetch_page_content(url)
    if main_page_content:
        return find_speech_links(main_page_content)
    else:
        return []

def parse_speech_data(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Adjusting the selectors based on the provided HTML structure
    date = soup.find('p', class_='article__time').get_text(strip=True)
    speaker = soup.find('p', class_='speaker').get_text(strip=True)
    location = soup.find('p', class_='location').get_text(strip=True)
    
    # Finding the main article content
    speech_text = ''
    article = soup.find(id='article')
    if article:
        # Extract all <p> elements and join their texts
        paragraphs = article.find_all('p')
        speech_text = '\n\n'.join(paragraph.get_text(strip=True) for paragraph in paragraphs)
    
    return {
        'Date': date,
        'Speaker': speaker,
        'Location': location,
        'Text': speech_text
    }
def scrape_speech_data(speech_url):
    # Fetch an individual speech page and extract the text and metadata
    html_content = fetch_page_content(speech_url)
    if html_content:
        return parse_speech_data(html_content)
    else:
        return None

In [132]:

years = ['2024','2023', '2022', '2021','2020']
base_url = 'https://www.federalreserve.gov/newsevents/speech/{}-speeches.htm'
all_speeches = []

for year in years:
    yearly_url = base_url.format(year)
    speech_links = fetch_yearly_speech_links(yearly_url)
        
    for link in speech_links:
        speech_data = scrape_speech_data(link)
        all_speeches.append(speech_data)
            # Here, you might want to pause or print progress
            
    # Now all_speeches contains speech data for the last 3 years
    # You can convert this to a DataFrame or process as needed
    

KeyboardInterrupt: 

In [None]:
speech_df =pd.DataFrame(all_speeches)
speech_df=speech_df.drop_duplicates(subset='Text')
speech_df.Location.value_counts().index


In [119]:
#speech_df.to_csv('speeches.csv')
speech_df

Unnamed: 0,Date,Speaker,Location,Text
0,"February 16, 2024",Vice Chair for Supervision Michael S. Barr,At the Annual Columbia Law School Banking Conf...,"February 16, 2024\n\nVice Chair for Supervisio..."
1,"February 15, 2024",Governor Christopher J. Waller,"At ""Climate, Currency, and Central Banking,"" a...","February 15, 2024\n\nGovernor Christopher J. W..."
2,"February 15, 2024",Governor Michelle W. Bowman,At the 19th BCBS-FSI High-Level Meeting for Af...,"February 15, 2024\n\nGovernor Michelle W. Bowm..."
3,"February 14, 2024",Vice Chair for Supervision Michael S. Barr,At the 40th Annual National Association for Bu...,"February 14, 2024\n\nVice Chair for Supervisio..."
4,"February 12, 2024",Governor Michelle W. Bowman,At the American Bankers Association 2024 Confe...,"February 12, 2024\n\nGovernor Michelle W. Bowm..."
...,...,...,...,...
275,"February 05, 2020",Governor Lael Brainard,"At the Symposium on the Future of Payments, St...","February 05, 2020\n\nGovernor Lael Brainard\n\..."
276,"January 17, 2020",Vice Chair for Supervision Randal K. Quarles,At the American Bar Association Banking Law Co...,"January 17, 2020\n\nVice Chair for Supervision..."
277,"January 16, 2020",Governor Michelle W. Bowman,"At the 2020 Economic Forecast Breakfast, Home ...","January 16, 2020\n\nGovernor Michelle W. Bowma..."
278,"January 09, 2020",Vice Chair Richard H. Clarida,At the C. Peter McColough Series on Internatio...,"January 09, 2020\n\nVice Chair Richard H. Clar..."


In [110]:
pattern = r'\s*(via\s+.*|virtual|and by webcast)'
speech_df['Location']=speech_df.Location.str.replace('At the','').replace('via webcast','')
speech_df['Location']=speech_df.Location.str.replace('via webcast','')
speech_df['Location']=speech_df['Location'].str.replace(pattern, '', regex=True)
speech_df['Location']=speech_df.Location.str.replace('(','')
speech_df['Location']=speech_df.Location.str.replace(')','')
#speech_df['Location']=speech_df.Location.str.replace('and','')

speech_df['Location']=speech_df.Location.str.split(',').str[-1].str.strip()
speech_df.Location.value_counts()

  speech_df['Location']=speech_df.Location.str.replace('(','')
  speech_df['Location']=speech_df.Location.str.replace(')','')


D.C.                                                                                                           78
New York                                                                                                       25
California                                                                                                     16
Massachusetts                                                                                                   9
Ohio                                                                                                            9
                                                                                                               ..
At National Native Coalition Virtual Series on the Community Reinvestment Act Notice of Proposed Rulemaking     1
"Recent Fiscal and Monetary Policy: Implications for U.S. and Israeli Real Estate Markets" conference           1
the Reserve Bank of New Zealand                                                         

In [83]:
developed_countries = {"Austria", "Canada", "Germany", "Ireland", "Italy", "New Zealand", "Norway", "Spain", "Sweden", "Switzerland", "United Kingdom"}
emerging_markets = {"Morocco", "South Africa",'Bahamas'}
us_states = {"Alabama", "Arizona", "Arkansas", "California", "Colorado", "D.C.", "DC", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Kentucky", "Louisiana", "Massachusetts", "Michigan", "Minnesota", "Mississippi", "Missouri", "Nevada", "New York", "North Carolina", "Ohio", "Oklahoma", "Pennsylvania", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Virginia", "Washington D.C.", "West Virginia", "Wyoming", "and Richmond", "and St. Louis",'Connecticut','FL','Montana','Kansas'}


In [111]:
def map_location_group(location):
    # Remove any additional information in parentheses for accurate matching
    clean_location = location.split(" (")[0].split('"')[0].strip()
    if clean_location in us_states:
        return 'US'
    elif clean_location in developed_countries:
        return 'Developed Countries'
    elif clean_location in emerging_markets:
        return 'Emerging Markets'
    else:
        return 'international_conferences'

speech_df['occasion']=speech_df.Location.map(map_location_group)
speech_df[speech_df.occasion=='international_conferences']['Location']

138    At National Native Coalition Virtual Series on...
151    "Recent Fiscal and Monetary Policy: Implicatio...
161                      the Reserve Bank of New Zealand
166                        and the European Central Bank
180                                 Missouri  conference
183                                     D.C.  conference
193              At The Prudential Regulation Conference
196    National Association of Insurance Commissioner...
202    " a conference sponsored by the Society for Ad...
215                   Institute of International Bankers
219    "Advance Together" Celebration sponsored by th...
221    Conference for Community Bankers sponsored by ...
222                            Economic Club of New York
224    Inaugural Mike McCracken Lecture on Full Emplo...
225                                   D.C. Virtual Event
228    Consumer Bankers Association Community Reinves...
229                                           and Future
230    At Independent Community

In [139]:
speech_df=pd.read_csv('5yearspeeches.csv')
#speech_df.loc[speech_df.Speaker=='Chair Pro Tempore Jerome H. Powell','Speaker']='Chair Jerome H. Powell'
#speech_df.loc[speech_df.Speaker=='Vice Chair Philip N. Jefferson','Speaker']='Governor Philip N. Jefferson'
#speech_df.loc[speech_df.Speaker=='Vice Chair for Supervision and Chair of the Financial Stability Board Randal K. Quarles','Speaker']='Governor Randal K. Quarles'
#speech_df.loc[speech_df.Speaker=='Governor Randal K. Quarles','Speaker']='Vice Chair for Supervision Randal K. Quarles'
speech_df.loc[speech_df.Speaker=='Vice Chair Lael Brainard','Speaker']='Governor Lael Brainard'
speech_df.Speaker.value_counts()
speech_df.to_csv('5yearspeeches.csv')

In [140]:
radar_data= pd.read_csv('radar_data.csv')
radar_data


Unnamed: 0.1,Unnamed: 0,Speaker,Doves,Hawks,Stability
0,1,Chair Jerome H. Powell,0.433333,0.333333,0.233333
1,2,Governor Adriana D. Kugler,0.0,1.0,0.0
2,3,Governor Christopher J. Waller,0.026316,0.552632,0.421053
3,4,Governor Lael Brainard,0.333333,0.288889,0.377778
4,5,Governor Lisa D. Cook,0.666667,0.333333,0.0
5,6,Governor Michelle W. Bowman,0.205882,0.397059,0.397059
6,7,Governor Philip N. Jefferson,0.25,0.666667,0.083333
7,8,Vice Chair Richard H. Clarida,0.047619,0.333333,0.619048
8,9,Vice Chair for Supervision Michael S. Barr,0.315789,0.473684,0.210526
9,10,Vice Chair for Supervision Randal K. Quarles,0.409091,0.454545,0.136364


In [125]:
%matplotlib inline
import matplotlib.pyplot as plt
from math import pi
import numpy as np


In [126]:
def plot_adjusted_radar_chart(df):
    # Define the number of variables and their angle on the chart
    labels=np.array(['Doves', 'Hawks', 'Stability'])
    num_vars = len(labels)
    
    # Compute angle each bar is centered on:
    angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
    
    # The radar chart is a circle, so we need to "complete the loop" and append the start value to the end.
    angles += angles[:1]
    
    # Plot setup
    fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
    plt.xticks(angles[:-1], labels)
    
    # Draw one dot per speaker and add lines to close the circle for each speaker
    for index, row in df.iterrows():
        values = row[['Doves', 'Hawks', 'Stability']].values.flatten().tolist()
        values += values[:1]
        ax.plot(angles, values, marker='o', label=row['Speaker'])
    
    # Add legend
    plt.legend(loc='upper right', bbox_to_anchor=(1.1, 1.1))


In [None]:
plot_adjusted_radar_chart(radar_data)

In [127]:
import seaborn as sns
import pandas as pd
%matplotlib qt
# Assuming 'df' is your DataFrame from earlier

# Convert the Speaker column into a categorical type if it's not already
radar_data['Speaker'] = pd.Categorical(radar_data['Speaker'])
palette = sns.color_palette("tab20", len(set(radar_data.Speaker)))
# Use seaborn's pairplot function to create the scatter plot matrix
g= sns.pairplot(radar_data, hue='Speaker', vars=['Doves', 'Hawks', 'Stability'], palette=palette, markers='o', 
                 height=5, aspect=1,plot_kws={'alpha':0.8, 's': 200})
# Map the plots to the grid

g = g.map_offdiag(plt.scatter, alpha=0.5, s=50)

# Set the axis limits
g.set(xlim=(0, 1.1), ylim=(0, 1.1))
# Add a legend with customized font sizes
#g.add_legend(title='Speaker', title_fontsize='13', fontsize='10')

# Adjust global font sizes for axes and ticks
plt.rcParams['axes.labelsize'] = 20
plt.rcParams['xtick.labelsize'] = 20
plt.rcParams['ytick.labelsize'] = 20
plt.rcParams['legend.title_fontsize'] = 20
plt.rcParams['legend.fontsize'] = 20
# Show the plot
plt.show()




#### 

In [None]:
len(set(radar_data.Speaker))

In [150]:
radar_data

Unnamed: 0.1,Unnamed: 0,Speaker,Doves,Hawks,Stability,Hawk_Proportion,Rescaled_Hawkishness
0,1,Chair Jerome H. Powell,0.433333,0.333333,0.233333,0.434783,-0.262364
1,2,Governor Adriana D. Kugler,0.0,1.0,0.0,1.0,inf
2,3,Governor Christopher J. Waller,0.026316,0.552632,0.421053,0.954545,3.044522
3,4,Governor Lael Brainard,0.333333,0.288889,0.377778,0.464286,-0.143101
4,5,Governor Lisa D. Cook,0.666667,0.333333,0.0,0.333333,-0.693147
5,6,Governor Michelle W. Bowman,0.205882,0.397059,0.397059,0.658537,0.65678
6,7,Governor Philip N. Jefferson,0.25,0.666667,0.083333,0.727273,0.980829
7,8,Vice Chair Richard H. Clarida,0.047619,0.333333,0.619048,0.875,1.94591
8,9,Vice Chair for Supervision Michael S. Barr,0.315789,0.473684,0.210526,0.6,0.405465
9,10,Vice Chair for Supervision Randal K. Quarles,0.409091,0.454545,0.136364,0.526316,0.105361


In [149]:
radar_data['Hawk_Proportion'] = radar_data['Hawks'] / (radar_data['Hawks'] + radar_data['Doves'])
#radar_data['Rescaled_Hawkishness'] = np.log(radar_data['Hawks']/radar_data['Doves'])#2 * (radar_data['Hawk_Proportion'] - 0.5)

# Plotting
plt.figure(figsize=(10, 6))
plt.scatter(radar_data['Rescaled_Hawkishness'], radar_data['Stability'], s=100, c='blue', alpha=0.5)
plt.axvline(x=0, color='grey', linestyle='--')
plt.axhline(y=0.5, color='grey', linestyle='--')  # Optional: if you want to mark the neutral stability line
plt.xlabel('Hawkishness (Dovish < 0 < Hawkish)')
plt.ylabel('Stability')
plt.title('Speaker Policy Inclinations')

# Annotate each point with the speaker's name for clarity
for i, txt in enumerate(radar_data['Speaker']):
    plt.annotate(txt, (radar_data['Rescaled_Hawkishness'][i], radar_data['Stability'][i]),fontsize=7)
plt.xlim(-1.5, 1.5)
plt.ylim(0, 1)
plt.show()

In [146]:
import pandas as pd
speeches= pd.read_csv('speeches.csv')
speeches.head()

Unnamed: 0.1,Unnamed: 0,Date,Speaker,Location,Text,occasion
0,0,"February 15, 2024",Governor Michelle W. Bowman,South Africa,"February 15, 2024\n\nGovernor Michelle W. Bowm...",Emerging Markets
1,1,"February 14, 2024",Vice Chair for Supervision Michael S. Barr,D.C.,"February 14, 2024\n\nVice Chair for Supervisio...",US
2,2,"February 12, 2024",Governor Michelle W. Bowman,Texas,"February 12, 2024\n\nGovernor Michelle W. Bowm...",US
3,3,"February 07, 2024",Governor Michelle W. Bowman,D.C. (via pre-recorded video),"February 07, 2024\n\nGovernor Michelle W. Bowm...",US
4,4,"February 07, 2024",Governor Adriana D. Kugler,D.C.,"February 07, 2024\n\nGovernor Adriana D. Kugle...",US


In [97]:
import yfinance as yf

In [101]:
start_date = "2020-01-01"
end_date = "2024-02-17"

# Download Bitcoin price data using Yahoo Finance (yfinance)
price = yf.download(["BTC-USD","DX-Y.NYB"], start=start_date, end=end_date)
close=price['Close'].reset_index().ffill()
daily_return=close[['BTC-USD','DX-Y.NYB']]/close.shift(1)-1
daily_return['Date']=close['Date']
daily_return

[*********************100%%**********************]  2 of 2 completed


Unnamed: 0,BTC-USD,DX-Y.NYB,Date
0,,,2020-01-01
1,-0.029819,,2020-01-02
2,0.051452,-0.000103,2020-01-03
3,0.008955,0.000000,2020-01-04
4,0.000089,0.000000,2020-01-05
...,...,...,...
1503,0.034462,0.000865,2024-02-12
1504,-0.004319,0.007584,2024-02-13
1505,0.041901,-0.002287,2024-02-14
1506,0.002158,-0.004011,2024-02-15


In [107]:
speech_df.Date=='Governor Randal K. Quarles','

{'April 04, 2023',
 'April 05, 2022',
 'April 09, 2020',
 'April 14, 2021',
 'April 14, 2023',
 'April 18, 2023',
 'April 20, 2023',
 'April 21, 2023',
 'August 03, 2021',
 'August 04, 2021',
 'August 05, 2021',
 'August 05, 2023',
 'August 06, 2020',
 'August 06, 2022',
 'August 07, 2023',
 'August 13, 2020',
 'August 17, 2021',
 'August 17, 2022',
 'August 19, 2020',
 'August 22, 2023',
 'August 25, 2023',
 'August 26, 2020',
 'August 26, 2022',
 'August 27, 2020',
 'August 27, 2021',
 'August 29, 2022',
 'August 31, 2020',
 'December 01, 2020',
 'December 01, 2022',
 'December 01, 2023',
 'December 02, 2021',
 'December 04, 2020',
 'December 05, 2023',
 'December 11, 2020',
 'December 17, 2020',
 'December 17, 2021',
 'December 18, 2020',
 'February 02, 2024',
 'February 05, 2020',
 'February 06, 2020',
 'February 07, 2023',
 'February 07, 2024',
 'February 08, 2023',
 'February 10, 2020',
 'February 10, 2021',
 'February 10, 2023',
 'February 11, 2020',
 'February 12, 2024',
 'Febr

In [113]:
speech_df['Date']= pd.to_datetime(speech_df['Date'])
speech_df.merge(daily_return,on='Date').to_csv('withreturnlong.csv')

In [116]:

speech_df.merge(move.reset_index(),on='Date').to_csv('withrangelong.csv')

In [114]:
move=(price.High- price.Low)/price.Close
move.reset_index()

Unnamed: 0,Date,BTC-USD,DX-Y.NYB
0,2020-01-01,0.011026,
1,2020-01-02,0.039637,0.004543
2,2020-01-03,0.067900,0.004131
3,2020-01-04,0.015906,
4,2020-01-05,0.019425,
...,...,...,...
1503,2024-02-12,0.050737,0.003648
1504,2024-02-13,0.039240,0.009432
1505,2024-02-14,0.052570,0.003056
1506,2024-02-15,0.027888,0.005081


In [None]:
price

In [None]:
stack = []
mapping = {')': '(', '}': '{', ']': '['}
stack.pop()

In [None]:
def isValid(s):
    stack = []
    mapping = {')': '(', '}': '{', ']': '['}

    for char in s:
        if char in mapping:
            if stack and stack[-1] == mapping[char]:  # Checking if stack is not empty before popping
                stack.pop()
            else:
                return False
        else:
            stack.append(char)

    return not stack
isValid('{Class}')

In [None]:
not []


In [None]:
ll[-1]

In [None]:
# Fibonacci using memoization
memo = {}
def fib(n):
    print(n)
    if n in memo:
        return memo[n]
    if n <= 1:
        return n
    memo[n] = fib(n-1) + fib(n-2)

    return memo[n]

# Example usage:
# print(fib(5))  # Output: 5


In [None]:
fib(20)

In [None]:
speeches.drop_duplicates(subset='Text').to_csv('nodupspeeches.csv')

In [7]:
speech_df

Unnamed: 0,Date,Speaker,Location,Text
0,"February 16, 2024",Vice Chair for Supervision Michael S. Barr,At the Annual Columbia Law School Banking Conf...,"February 16, 2024\n\nVice Chair for Supervisio..."
1,"February 15, 2024",Governor Christopher J. Waller,"At ""Climate, Currency, and Central Banking,"" a...","February 15, 2024\n\nGovernor Christopher J. W..."
2,"February 15, 2024",Governor Michelle W. Bowman,At the 19th BCBS-FSI High-Level Meeting for Af...,"February 15, 2024\n\nGovernor Michelle W. Bowm..."
3,"February 14, 2024",Vice Chair for Supervision Michael S. Barr,At the 40th Annual National Association for Bu...,"February 14, 2024\n\nVice Chair for Supervisio..."
4,"February 12, 2024",Governor Michelle W. Bowman,At the American Bankers Association 2024 Confe...,"February 12, 2024\n\nGovernor Michelle W. Bowm..."
...,...,...,...,...
222,"February 10, 2021",Chair Jerome H. Powell,At the Economic Club of New York (via webcast),"February 10, 2021\n\nChair Jerome H. Powell\n\..."
223,"January 13, 2021",Vice Chair Richard H. Clarida,"At the ""The Road Ahead for Central Banks,"" a s...","January 13, 2021\n\nVice Chair Richard H. Clar..."
224,"January 13, 2021",Governor Lael Brainard,At the Inaugural Mike McCracken Lecture on Ful...,"January 13, 2021\n\nGovernor Lael Brainard\n\n..."
225,"January 12, 2021",Governor Lael Brainard,At the AI Academic Symposium hosted by the Boa...,"January 12, 2021\n\nGovernor Lael Brainard\n\n..."


In [48]:
speech_df.Location.value_counts().index

Index(['Peterson Institute for International Economics, Washington, D.C. ()',
       'Money Marketeers of New York University, New York, New York',
       'Forecasters Club of New York, New York, New York',
       'C. Peter McColough Series on International Economics, Council on Foreign Relations, New York, New York',
       'At "A Look at the Past, Present, and Future," a conference celebrating the Centennial of the Division of Research and Statistics, Board of Governors of the Federal Reserve System, Washington, D.C.',
       'American Enterprise Institute, Washington, D.C.',
       'Brookings Institution, Washington, D.C.',
       'Hutchins Center on Fiscal and Monetary Policy, The Brookings Institution, Washington, D.C. ()',
       'Peterson Institute for International Economics, Washington, D.C.',
       '2022 U.S. Monetary Policy Forum, New York, New York',
       ...
       'At The Chicago Booth Initiative on Global Markets Workshop on Market Dysfunction, Chicago, Illinois',
   

In [129]:
speeches.Speaker.value_counts()

NameError: name 'speeches' is not defined

In [143]:
set(speech_df.Speaker)

{'Chair Jerome H. Powell',
 'Governor Adriana D. Kugler',
 'Governor Christopher J. Waller',
 'Governor Lael Brainard',
 'Governor Lisa D. Cook',
 'Governor Michelle W. Bowman',
 'Governor Philip N. Jefferson',
 'Vice Chair Richard H. Clarida',
 'Vice Chair for Supervision Michael S. Barr',
 'Vice Chair for Supervision Randal K. Quarles'}

In [156]:
speeches['Text'].str.split().map(len).mean()

2465.1511111111113