In [411]:
# Import relevant libraries.
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import timedelta

### Data Preparation

In [413]:
# Load the dataset
speeches = pd.read_csv('speeches_sentiment.csv')
speeches.head()

Unnamed: 0,reference,country,date,title,author,is_gov,text,text_norm,text_cleaned,text_tokenised,...,negative,positive,uncertainty,litigious,strong,weak,constraining,word_count_sentiment,sentiment_lexicon_simple,sentiment_lexicon_weighted
0,r901128a_BOA,australia,1990-11-28,A Proper Role for Monetary Policy,fraser,0,They would no doubt argue that to have two obj...,they would no doubt argue that to have two obj...,would doubt argue two objectives like trying c...,"['would', 'doubt', 'argue', 'two', 'objectives...",...,84,58,32,5,10,15,13,217,-0.119816,0.112442
1,r911003a_BOA,australia,1991-10-03,,fraser,0,Today I wish to talk about real interest rates...,today i wish to talk about real interest rates...,today wish talk real interest rates mainly his...,"['today', 'wish', 'talk', 'real', 'interest', ...",...,53,28,35,2,3,16,12,149,-0.167785,0.014094
2,r920314a_BOA,australia,1992-03-14,,fraser,0,I welcome this opportunity to talk about prosp...,i welcome this opportunity to talk about prosp...,welcome opportunity talk prospects banks austr...,"['welcome', 'opportunity', 'talk', 'prospects'...",...,43,67,33,8,11,16,13,191,0.125654,0.421466
3,r920529a_BOA,australia,1992-05-29,,fraser,0,It is a pleasure to have this opportunity to a...,it is a pleasure to have this opportunity to a...,pleasure opportunity address influential gathe...,"['pleasure', 'opportunity', 'address', 'influe...",...,62,56,43,6,7,20,8,202,-0.029703,0.227228
4,r920817a_BOA,australia,1992-08-17,,fraser,0,"As a long-time fan of Don Sanders, I am deligh...","as a long-time fan of don sanders, i am deligh...",long time fan sanders delighted participating ...,"['long', 'time', 'fan', 'sanders', 'delighted'...",...,72,62,42,6,12,27,13,234,-0.042735,0.22735


In [414]:
# Initial overview
print("🔍 Initial Data Overview:")
speeches.info()

🔍 Initial Data Overview:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7721 entries, 0 to 7720
Data columns (total 24 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   reference                   7721 non-null   object 
 1   country                     7721 non-null   object 
 2   date                        7721 non-null   object 
 3   title                       7721 non-null   object 
 4   author                      7721 non-null   object 
 5   is_gov                      7721 non-null   int64  
 6   text                        7721 non-null   object 
 7   text_norm                   7721 non-null   object 
 8   text_cleaned                7721 non-null   object 
 9   text_tokenised              7721 non-null   object 
 10  text_lemmatised             7721 non-null   object 
 11  text_lemmatised_str         7721 non-null   object 
 12  word_count_text             7721 non-null   int64  
 13  word_cou

In [415]:
# Count missing values
missing_summary = speeches.isnull().sum()
missing_summary = missing_summary[missing_summary > 0].sort_values(ascending=False)

print("\nMissing Values Detected:")
missing_summary


Missing Values Detected:


sentiment_lexicon_simple      1
sentiment_lexicon_weighted    1
dtype: int64

In [416]:
# Check for duplicate entries by 'reference'
duplicate_entries = speeches[speeches.duplicated(subset='reference', keep=False)]

print(f"\nDuplicate Records Found: {duplicate_entries.shape[0]}")


Duplicate Records Found: 0


In [460]:
# Splitting "date" & identify lenght speech    
speeches["date"] = pd.to_datetime(speeches["date"], errors='coerce')
speeches["year"] = speeches["date"].dt.year
speeches["month"] = speeches["date"].dt.month
speeches["speech_length"] = speeches["text"].str.split().apply(len)
speeches.head()

Unnamed: 0,reference,country,date,title,author,is_gov,text,text_norm,text_cleaned,text_tokenised,...,litigious,strong,weak,constraining,word_count_sentiment,sentiment_lexicon_simple,sentiment_lexicon_weighted,year,month,speech_length
0,r901128a_BOA,australia,1990-11-28,A Proper Role for Monetary Policy,fraser,0,They would no doubt argue that to have two obj...,they would no doubt argue that to have two obj...,would doubt argue two objectives like trying c...,"['would', 'doubt', 'argue', 'two', 'objectives...",...,5,10,15,13,217,-0.119816,0.112442,1990,11,3627
1,r911003a_BOA,australia,1991-10-03,,fraser,0,Today I wish to talk about real interest rates...,today i wish to talk about real interest rates...,today wish talk real interest rates mainly his...,"['today', 'wish', 'talk', 'real', 'interest', ...",...,2,3,16,12,149,-0.167785,0.014094,1991,10,3054
2,r920314a_BOA,australia,1992-03-14,,fraser,0,I welcome this opportunity to talk about prosp...,i welcome this opportunity to talk about prosp...,welcome opportunity talk prospects banks austr...,"['welcome', 'opportunity', 'talk', 'prospects'...",...,8,11,16,13,191,0.125654,0.421466,1992,3,3399
3,r920529a_BOA,australia,1992-05-29,,fraser,0,It is a pleasure to have this opportunity to a...,it is a pleasure to have this opportunity to a...,pleasure opportunity address influential gathe...,"['pleasure', 'opportunity', 'address', 'influe...",...,6,7,20,8,202,-0.029703,0.227228,1992,5,3841
4,r920817a_BOA,australia,1992-08-17,,fraser,0,"As a long-time fan of Don Sanders, I am deligh...","as a long-time fan of don sanders, i am deligh...",long time fan sanders delighted participating ...,"['long', 'time', 'fan', 'sanders', 'delighted'...",...,6,12,27,13,234,-0.042735,0.22735,1992,8,4152


In [462]:
# Sentiment-related columns
sentiment_columns = [
    'negative', 'positive', 'uncertainty', 'litigious',
    'strong', 'weak', 'constraining',
    'sentiment_lexicon_simple', 'sentiment_lexicon_weighted'
]

# Ensure numeric types
speeches[sentiment_columns] = speeches[sentiment_columns].apply(pd.to_numeric, errors='coerce')

# Check final types
print("\nSentiment Columns Data Types:")
print(speeches[sentiment_columns].dtypes)


Sentiment Columns Data Types:
negative                        int64
positive                        int64
uncertainty                     int64
litigious                       int64
strong                          int64
weak                            int64
constraining                    int64
sentiment_lexicon_simple      float64
sentiment_lexicon_weighted    float64
dtype: object


In [463]:
# List unique authors sorted alphabetically
unique_authors = speeches['author'].dropna().unique()
unique_authors = sorted(unique_authors)

print(f"🧾 Total unique authors: {len(unique_authors)}\n")
for author in unique_authors:
    print(f"- {author}")

🧾 Total unique authors: 325

- NO_INFO
- ]
- ac
- adachi
- adams
- aikman
- aires
- allen
- allsopp
- amamiya
- ammann
- asmussen
- backstrom
- bailey
- bake
- baltensperger
- bank
- banking
- bankwashington
- barker
- barr
- bean
- beaudry
- bell
- benjamin
- bergstrom
- berlin
- bernanke
- besley
- bies
- blanchflower
- blattner
- boivin
- bond
- bonin
- bowe
- bowman
- braddick
- brainard
- brandon
- bratislava
- brazier
- brealey
- breeden
- breman
- broadbent
- brown
- brussels
- budd
- buiter
- bulley
- capital
- carney
- cbi
- chilcott
- churm
- clarida
- clark
- claus
- cleland
- clementi
- coeure
- cohrs
- committee
- conference
- constancio
- cook
- cote
- crises
- crisis
- cunliffe
- dale
- danthine
- delivered
- dinis
- dodge
- draghi
- duguay
- duisenberg
- duke
- ecb
- efma
- ekholm
- elderson
- emu
- environment
- eu
- ferguson
- fischer
- fisher
- floden
- forbes
- forum
- forward
- francisco
- fraser
- fuglister
- fujiwara
- fukui
- funo
- furse
- gehrig
- george
- ger

In [464]:
# List of clearly invalid "author" values
invalid_authors = {
    'NO_INFO', 'no_info', ']', 'committee', 'conference', 'delivered',
    'bank', 'bankwashington', 'cbi', 'ecb', 'eu', 'london', 'madrid', 'singapore',
    'vienna', 'strasbourg', 'tokyo', 'washington', 'rome', 'network', 'summit',
    'environment', 'francisco', 'istanbul', 'policy', 'industry', 'governor',
    'responsibility', 'place', 'targeting', 'iii'
}

# Remove rows with invalid authors
speeches_cleaned = speeches[~speeches['author'].str.lower().isin(invalid_authors)].copy()

# Capitalise author names consistently
speeches_cleaned['author'] = speeches_cleaned['author'].str.strip().str.title()

print(f"Rows remaining after removing invalid authors: {speeches_cleaned.shape[0]}")
print(f"Unique authors now: {speeches_cleaned['author'].nunique()}")

Rows remaining after removing invalid authors: 7224
Unique authors now: 294


### UK Speeches 

In [466]:
# Filter speeches from the United Kingdom only
uk_speeches = speeches_cleaned[speeches_cleaned['country'].str.lower() == 'united kingdom'].copy()

print(f"🇬🇧 Total UK speeches: {uk_speeches.shape[0]}")

🇬🇧 Total UK speeches: 1169


In [467]:
# Clean author column
uk_speeches["author"] = uk_speeches["author"].fillna("").str.strip().str.title()

# Add is_gov column defaulting to 0
uk_speeches["is_gov"] = 0

# Edward George: 01‑Jul‑1993 → 30‑Jun‑2003
mask_george = (
    uk_speeches["author"].str.contains("George", case=False)
    & (uk_speeches["date"] >= "1993-07-01")
    & (uk_speeches["date"] <= "2003-06-30")
)
uk_speeches.loc[mask_george, "is_gov"] = 1

# Mervyn King: 01‑Jul‑2003 → 30‑Jun‑2013
mask_king = (
    uk_speeches["author"].str.contains("King", case=False)
    & (uk_speeches["date"] >= "2003-07-01")
    & (uk_speeches["date"] <= "2013-06-30")
)
uk_speeches.loc[mask_king, "is_gov"] = 1

# Mark Carney: 01‑Jul‑2013 → 15‑Mar‑2020
mask_carney = (
    uk_speeches["author"].str.contains("Carney", case=False)
    & (uk_speeches["date"] >= "2013-07-01")
    & (uk_speeches["date"] <= "2020-03-15")
)
uk_speeches.loc[mask_carney, "is_gov"] = 1

# Andrew Bailey: 16‑Mar‑2020 → present
mask_bailey = (
    uk_speeches["author"].str.contains("Bailey", case=False)
    & (uk_speeches["date"] >= "2020-03-16")
)
uk_speeches.loc[mask_bailey, "is_gov"] = 1

In [468]:
gov_count = uk_speeches['is_gov'].sum()
print(f"Total speeches by Governors: {gov_count}")

print("\nExample of speeches by governors:")
print(uk_speeches[uk_speeches['is_gov'] == 1][['date', 'author', 'title']].head())

Total speeches by Governors: 194

Example of speeches by governors:
           date  author              title
4961 1998-09-15  George             Speech
4962 1998-10-21  George  Britain in Europe
4966 1998-11-19  George             Speech
4969 1999-01-12  George             Speech
4970 1999-01-18  George             Speech


In [469]:
# Define common bin range based on both sentiment columns
min_val = min(uk_speeches['sentiment_lexicon_weighted'].min(), uk_speeches['sentiment_lexicon_simple'].min())
max_val = max(uk_speeches['sentiment_lexicon_weighted'].max(), uk_speeches['sentiment_lexicon_simple'].max())

# Create uniform bin edges (e.g., 50 bins)
bin_edges = np.linspace(min_val, max_val, 51)  # 50 bins = 51 edges

# Histogram: Weighted Sentiment
fig_weighted = px.histogram(
    uk_speeches,
    x='sentiment_lexicon_weighted',
    nbins=50,
    title='Distribution of Weighted Sentiment (UK Speeches)',
    labels={'sentiment_lexicon_weighted': 'Sentiment Score'}
)
fig_weighted.update_xaxes(range=[min_val, max_val])
fig_weighted.show()

# Histogram: Simple Sentiment
fig_simple = px.histogram(
    uk_speeches,
    x='sentiment_lexicon_simple',
    nbins=50,
    title='Distribution of Simple Sentiment (UK Speeches)',
    labels={'sentiment_lexicon_simple': 'Sentiment Score'}
)
fig_simple.update_xaxes(range=[min_val, max_val])
fig_simple.show()

Plot 1 – Distribution of Weighted Sentiment (UK Speeches)
This histogram shows the distribution of weighted sentiment scores, which apply different importance levels to sentiment categories (e.g., positive = 1.5, negative = -1, etc.).

* The distribution is slightly right-skewed, centered just above 0.

* Most speeches have sentiment scores between 0 and 0.3, suggesting that Bank of England speeches generally carry a mildly positive tone when accounting for weighted emphasis on strength and positivity.

* A small number of speeches score negatively (below 0), reflecting more cautionary or crisis-driven content.

* The positive skew aligns with the Bank’s role in promoting economic stability and confidence, especially during uncertain periods.

***Conclusion:*** The Bank tends to emphasize optimism and strength in its official communications, which is strategically aligned with its responsibility to reassure markets and the public.

Plot 2 – Distribution of Simple Sentiment (UK Speeches)
This histogram displays simple sentiment scores, calculated as a raw total of sentiment-labeled words without applying custom weights.

* The distribution here is centered slightly below zero, with a more balanced spread to the left (negative values).

* This suggests that when sentiment words are treated equally (e.g., “constraining” and “strong” given equal weight), the tone appears mildly negative or cautious overall.

* There is still a noticeable right tail, indicating that some speeches are clearly positive, though fewer than in the weighted version.

***Conclusion:*** Without weighting for intent (like emphasis on "strength" vs. "constraint"), the sentiment tone appears more conservative, likely due to frequent use of cautionary language (e.g., “uncertainty”, “risk”) that’s common in financial communication.

**Overall Insight**

These two plots together reinforce that:

 * The weighted sentiment score better reflects the intended tone and reassurance strategy of the Bank.
 * The simple sentiment score shows the raw lexical caution often present in economic speeches.

In [471]:
# Aggregate sentiment over time (monthly or quarterly)
uk_speeches['year_month'] = uk_speeches['date'].dt.to_period('M').astype(str)

sentiment_over_time = uk_speeches.groupby('year_month')['sentiment_lexicon_weighted'].mean().reset_index()

# Line plot
fig_over_time = px.line(
    sentiment_over_time,
    x='year_month',
    y='sentiment_lexicon_weighted',
    title='Average Weighted Sentiment Over Time (UK)',
    labels={'year_month': 'Date', 'sentiment_lexicon_weighted': 'Average Sentiment'}
)
fig_over_time.update_xaxes(type='category')
fig_over_time.show()

This chart shows the monthly average of weighted sentiment scores across UK central bank speeches over several decades.

* 1997–2007 (Pre-Global Financial Crisis):

    * Generally positive sentiment, fluctuating between ~0.1 and 0.4.
    * This aligns with the stable economic period following Bank of England independence (1997) and before the 2008 financial crisis.
    * Some occasional drops may reflect early warnings or geopolitical tension, but sentiment quickly rebounds.
      

* 2008–2012 (Crisis and Recovery Period):

    * Noticeable decline in average sentiment, with some months even dipping below 0.
    * This matches the Global Financial Crisis and Eurozone instability.
    * The tone becomes more cautious and less optimistic, consistent with crisis communication.
      

* 2013–2016 (Post-crisis recovery):

    * Sentiment improves slightly, hovering around 0.1–0.2.
    * Reflects gradual recovery and a more reassuring communication tone.
      

* 2016–2020 (Brexit + COVID onset):

    * Increased volatility in sentiment, reflecting uncertainty from the Brexit referendum and its political/economic implications.
    * Some sharp dips in 2020 coincide with the COVID-19 pandemic, when speech tone again becomes more neutral or slightly negative.
      

* 2021–2022 (COVID recovery and inflation concerns):

    * Mixed sentiment: some rebound, but not reaching earlier highs.
    * Reflects a more measured tone, balancing optimism with concerns over inflation and monetary tightening.
      

**Key Takeaway:**

The Bank of England’s speech sentiment over time reflects its dual role:

* Providing reassurance during crises (hence temporary positivity even in downturns),

* While also acknowledging uncertainty (resulting in dips during economic shocks).

* The trendline confirms that major events (GFC, Brexit, COVID) correlate with significant shifts in communication tone, supporting the scenario goal of linking sentiment to macroeconomic events.

In [473]:
# Monthly aggregation
uk_speeches['year_month'] = uk_speeches['date'].dt.to_period('M').dt.to_timestamp()
monthly_sentiment = (
    uk_speeches
    .groupby('year_month')['sentiment_lexicon_weighted']
    .mean()
    .reset_index()
)

# Convert event dates to datetime
events = [
    ("BoE Independence", "1997-05"),
    ("Global Financial Crisis", "2008-09"),
    ("Brexit Vote", "2016-06"),
    ("COVID Lockdown", "2020-03")
]

# Convert to datetime format
events = [(name, pd.to_datetime(f"{date}-01")) for name, date in events]

# Plot
fig_events_with_sent_over_time = go.Figure()

# Main sentiment line
fig_events_with_sent_over_time.add_trace(go.Scatter(
    x=monthly_sentiment['year_month'],
    y=monthly_sentiment['sentiment_lexicon_weighted'],
    mode='lines+markers',
    name='Avg Monthly Sentiment (UK)'
))

# Add event lines and labels
for name, event_date in events:
    fig_events_with_sent_over_time.add_shape(
        type='line',
        x0=event_date, x1=event_date,
        y0=monthly_sentiment['sentiment_lexicon_weighted'].min(),
        y1=monthly_sentiment['sentiment_lexicon_weighted'].max(),
        line=dict(color='red', width=1, dash='dot')
    )
    fig_events_with_sent_over_time.add_annotation(
        x=event_date,
        y=monthly_sentiment['sentiment_lexicon_weighted'].max(),
        text=name,
        showarrow=False,
        yshift=10,
        font=dict(size=10, color="red")
    )

# Format layout
fig_events_with_sent_over_time.update_layout(
    title="UK Speech Sentiment Over Time (Monthly) with Key Events",
    xaxis_title="Date (Month-Year)",
    yaxis_title="Avg Weighted Sentiment",
    xaxis_tickformat="%b %Y",
    template="plotly_white",
    height=600,
    width=1200,  
    margin=dict(r=150)  
)


fig_events_with_sent_over_time.show()

The weighted sentiment of Bank of England speeches over time shows alignment with major economic and political events in the UK. 

Following the Bank’s independence in 1997, sentiment rose, reflecting growing optimism and institutional credibility. 

During the 2008 Global Financial Crisis, sentiment dropped sharply, capturing the urgency, uncertainty, and pessimism in central bank communications amid systemic financial stress. 

The 2016 Brexit vote marked another key turning point, with sentiment showing moderate volatility thereafter, suggesting caution and concern in response to prolonged political and economic uncertainty. 

A further sharp decline is observed during the onset of the COVID-19 lockdown in March 2020, consistent with crisis-related language and emergency policy responses. 

Overall, the analysis highlights how the tone of central bank speeches reflects broader macroeconomic conditions and policy context, suggesting that speech sentiment can be a valuable signal of institutional response to national and global shocks.

In [475]:
# Box plot comparing sentiment
fig_gov = px.box(
    uk_speeches,
    x='is_gov',
    y='sentiment_lexicon_weighted',
    points='all',
    title='Sentiment by Governor Status (UK Speeches)',
    labels={'is_gov': 'Is Governor (1 = Yes)', 'sentiment_lexicon_weighted': 'Weighted Sentiment'}
)
fig_gov.show()

Speeches delivered by Governors tend to have slightly higher average sentiment scores compared to those by non-governors. This suggests that governors generally use more positive or reassuring language, aligning with their role in promoting financial stability and market confidence.
Non-governor speeches exhibit greater variability in sentiment, possibly reflecting a broader range of topics or speaker roles. The narrower sentiment range in governor speeches indicates a more controlled and consistent tone, reinforcing their function as stabilising voices during economic communication.

In [477]:
# Top Authors by Number of Speeches:
author_counts = uk_speeches['author'].value_counts().head(10).reset_index()
author_counts.columns = ['author', 'speech_count']

fig_top10 = px.bar(
    author_counts,
    x='author',
    y='speech_count',
    title='Top 10 Authors by Number of UK Speeches',
    text='speech_count'
)
fig_top10.show()

This chart highlights the most active contributors to the Bank’s public communications. Prominent figures like Andy Haldane, Mark Carney, Mervyn King, and Andrew Bailey have delivered the highest number of speeches. Most of these individuals held senior leadership roles, including Governor or Chief Economist, and are therefore central to the Bank’s public messaging strategy.

In [479]:
# Average Sentiment per Top Author:
top_authors = author_counts['author'].tolist()

author_sentiment = (
    uk_speeches[uk_speeches['author'].isin(top_authors)]
    .groupby('author')['sentiment_lexicon_weighted']
    .mean()
    .reset_index()
    .sort_values(by='sentiment_lexicon_weighted', ascending=False)
)

# Round sentiment values
author_sentiment['sentiment_lexicon_weighted'] = author_sentiment['sentiment_lexicon_weighted'].round(2)

# Plot with rounded values
fig_avg_sent_top_authors = px.bar(
    author_sentiment,
    x='author',
    y='sentiment_lexicon_weighted',
    title='Average Sentiment by Top Authors',
    labels={'sentiment_lexicon_weighted': 'Avg Weighted Sentiment'},
    text='sentiment_lexicon_weighted'
)
fig_avg_sent_top_authors.show()

This chart shows variation in average weighted sentiment across those same top authors. Notably:

* Edward George and Mark Carney delivered speeches with the most positive sentiment, aligning with their responsibility to project confidence and stability.

* Tucker, Cunliffe, and Bean show lower or more neutral sentiment, possibly reflecting more technical or risk-focused messaging in their roles.

**Takeaway:**
Governors use more positive and measured language, aligning with their responsibility to reassure markets and maintain financial stability, while deputy governors and directors adopt a more neutral or risk-aware tone, consistent with their mandates.

***Note:***
* Governors (Carney, King, George, Bailey) all show higher-than-average sentiment, reinforcing their role in projecting confidence and stability.

* Non-governors like Haldane and Hauser also speak frequently, but often with more neutral or technical tone, consistent with their roles in analysis or operational functions.

* Deputy Governors (Tucker, Bean, Cunliffe, Broadbent) often deliver analytical or risk-focused speeches, which may explain lower sentiment scores.

### Sentiment Before vs After Events

In [484]:
# Major events (month granularity)
events = [
    ("Global Financial Crisis", "2008-09"),
    ("Brexit Vote", "2016-06"),
    ("COVID Lockdown", "2020-03")
]

# Create DataFrame and convert to datetime
event_df = pd.DataFrame(events, columns=['event', 'month'])
event_df['month'] = pd.to_datetime(event_df['month'], format='%Y-%m')

In [485]:
# uk_speeches['date'] is datetime
uk_speeches['date'] = pd.to_datetime(uk_speeches['date'])

sentiment_comparison = []

for _, row in event_df.iterrows():
    event_name = row['event']
    event_date = row['month']

    # ±1 month window
    before_start = event_date - pd.DateOffset(months=1)
    before_end = event_date - pd.DateOffset(days=1)
    after_start = event_date
    after_end = event_date + pd.DateOffset(months=1) - pd.DateOffset(days=1)

    # Filter speeches
    before_speeches = uk_speeches[
        (uk_speeches['date'] >= before_start) & (uk_speeches['date'] <= before_end)
    ]
    after_speeches = uk_speeches[
        (uk_speeches['date'] >= after_start) & (uk_speeches['date'] <= after_end)
    ]

    # Append results
    sentiment_comparison.append({
        'event': event_name,
        'period': 'Before',
        'avg_sentiment': before_speeches['sentiment_lexicon_weighted'].mean(),
        'count': before_speeches.shape[0]
    })
    sentiment_comparison.append({
        'event': event_name,
        'period': 'After',
        'avg_sentiment': after_speeches['sentiment_lexicon_weighted'].mean(),
        'count': after_speeches.shape[0]
    })

# sentiment_df
sentiment_df = pd.DataFrame(sentiment_comparison)

In [486]:
fig_sentiment_beforeafter = px.bar(
    sentiment_df,
    x='event',
    y='avg_sentiment',
    color='period',
    barmode='group',
    text='avg_sentiment',
    title='Average Speech Sentiment Before vs After Major UK Events',
    labels={'avg_sentiment': 'Avg Weighted Sentiment', 'event': 'Event'},
    height=500
)

fig_sentiment_beforeafter.update_traces(texttemplate='%{text:.2f}', textposition='outside')
fig_sentiment_beforeafter.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig_sentiment_beforeafter.show()

This broader analysis aggregates sentiment across all UK central bank speeches, comparing average tone in the three months before and after major national events. 

While general sentiment increased following the Brexit vote and, to a lesser extent, the Global Financial Crisis, a decline was observed after the onset of the COVID-19 pandemic. 

However, this aggregated view masks important role-based differences. For instance, the apparent rise in sentiment post-Brexit is largely driven by non-governor contributions, as governor tone remained consistently high. This reinforces the importance of disaggregating sentiment by speaker role to better understand how Bank communications are shaped in response to macroeconomic events, which is critical for evaluating the Bank’s influence on market perceptions and financial stability.

#### Sentiment Before vs After Events Gov vs Non-Gov

In [489]:
sentiment_comparison_grouped = []

for _, row in event_df.iterrows():
    event_name = row['event']
    event_date = row['month']

    # 3-month window
    before_start = event_date - pd.DateOffset(months=3)
    before_end = event_date - pd.DateOffset(days=1)
    after_start = event_date
    after_end = event_date + pd.DateOffset(months=3) - pd.DateOffset(days=1)

    for gov_status in [1, 0]:  # 1 = Governor, 0 = Non-Governor
        label = 'Governor' if gov_status == 1 else 'Non-Governor'
        filtered = uk_speeches[uk_speeches['is_gov'] == gov_status]

        before = filtered[(filtered['date'] >= before_start) & (filtered['date'] <= before_end)]
        after = filtered[(filtered['date'] >= after_start) & (filtered['date'] <= after_end)]

        sentiment_comparison_grouped.extend([
            {
                'event': event_name,
                'group': label,
                'period': 'Before',
                'avg_sentiment': before['sentiment_lexicon_weighted'].mean(),
                'count': before.shape[0]
            },
            {
                'event': event_name,
                'group': label,
                'period': 'After',
                'avg_sentiment': after['sentiment_lexicon_weighted'].mean(),
                'count': after.shape[0]
            }
        ])

In [490]:
sentiment_df_grouped = pd.DataFrame(sentiment_comparison_grouped)

# Combine period + group into one column for easier plotting
sentiment_df_grouped['label'] = sentiment_df_grouped['period'] + ' - ' + sentiment_df_grouped['group']

In [491]:
fig_sentiment_beforeafter_3months = px.bar(
    sentiment_df_grouped,
    x='event',
    y='avg_sentiment',
    color='label',
    barmode='group',
    text='avg_sentiment',
    title='Governor vs Non-Governor Sentiment Before and After Key Events (±3 Months)',
    labels={'avg_sentiment': 'Avg Weighted Sentiment', 'event': 'Event'},
    height=500
)

fig_sentiment_beforeafter_3months.update_traces(texttemplate='%{text:.2f}', textposition='outside')
fig_sentiment_beforeafter_3months.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig_sentiment_beforeafter_3months.show()

The plot reveals a significant divergence in how governors and non-governors responded. 

During the Global Financial Crisis, governors exhibited a sharp decline in sentiment, reflecting the urgency and severity of the situation, whereas non-governors showed only a modest dip. 

Following the Brexit vote, sentiment among governors remained high and steady, suggesting efforts to maintain market confidence, while non-governors exhibited a notable uplift in sentiment, potentially reflecting reduced uncertainty or political alignment. 

During the COVID-19 lockdown, sentiment from non-governors dropped significantly, while governors maintained a relatively stable tone—consistent with their role in providing reassurance amid economic disruption. 

These differences underscore the strategic importance of the Bank’s communication roles during periods of instability.

## MPC

In [494]:
# Convert both to DataFrames
votes_df = pd.read_excel("mpc_votes_by_author.xlsx")
rate_df = pd.read_excel("mpc_bank_rates_voted.xlsx")

# Merge them on year/month to enrich votes with actual rate outcome
votes_df['vote_month'] = votes_df['month']  # for clarity
votes_merged = votes_df.merge(
    rate_df[['vote_year', 'vote_month', 'rate_diff', 'rate_change', 'date']],
    how='left',
    left_on=['year', 'month'],
    right_on=['vote_year', 'vote_month']
)

In [495]:
# Join this merged data to speeches on author + date
uk_speeches['year'] = uk_speeches['date'].dt.year
uk_speeches['month'] = uk_speeches['date'].dt.month

# Normalise author names (title-case or lowercase match)
uk_speeches['author_clean'] = uk_speeches['author'].str.lower()
votes_merged['author_clean'] = votes_merged['author'].str.lower()

# Merge votes into speeches
speeches_with_votes = uk_speeches.merge(
    votes_merged,
    on=['author_clean', 'year', 'month'],
    how='left'
)

In [496]:
fig_vote = px.box(
    speeches_with_votes.dropna(subset=['vote']),
    x='vote',
    y='sentiment_lexicon_weighted',
    title='Speech Sentiment by MPC Vote Direction',
    color='vote'
)
fig_vote.show()

* Speeches associated with an “increase” vote display the highest median sentiment scores, suggesting a more confident or optimistic tone.
* In contrast, speeches linked to “reduce” votes show noticeably lower sentiment, indicating a more cautious or negative tone.
* “Maintain” votes fall in between, with a wider and more neutral distribution.

***This result strongly supports the idea that the tone of central bank communications reflects underlying policy preferences. It aligns well with the Bank of England’s strategic interest in understanding whether speech sentiment can reveal or even anticipate monetary policy decisions.***

In [498]:
fig_magnitude = px.scatter(
    speeches_with_votes.dropna(subset=['rate_diff']),
    x='rate_diff',
    y='sentiment_lexicon_weighted',
    title='Speech Sentiment vs Rate Change Magnitude',
    trendline='ols'
)
fig_magnitude.show()

* There is a slight positive correlation: when interest rates increase (positive rate_diff), speeches tend to show slightly more positive sentiment.

* Conversely, small or negative rate changes are associated with more neutral or even slightly negative tones.

* However, the correlation is weak, as data points are widely dispersed, indicating other factors may also influence sentiment.

***While sentiment trends slightly with rate changes, it may not serve as a strong standalone predictor.***

In [500]:
# In speeches: extract year and month
uk_speeches['year'] = uk_speeches['date'].dt.year
uk_speeches['month'] = uk_speeches['date'].dt.month

# Normalize author name to lowercase
uk_speeches['author_clean'] = uk_speeches['author'].str.lower().str.strip()
votes_df['author_clean'] = votes_df['author'].str.lower().str.strip()


In [501]:
# Merge on author + year + month
speeches_with_votes = uk_speeches.merge(
    votes_df[['author_clean', 'year', 'month', 'vote']],
    on=['author_clean', 'year', 'month'],
    how='left'
)

# Check how many speeches got matched with votes
matched = speeches_with_votes['vote'].notna().sum()
total = speeches_with_votes.shape[0]
print(f"Matched {matched} out of {total} UK speeches with vote data.")

Matched 750 out of 1176 UK speeches with vote data.


In [502]:
fig_sentimentbyvote = px.box(
    speeches_with_votes.dropna(subset=['vote']),
    x='vote',
    y='sentiment_lexicon_weighted',
    color='vote',
    title='Speech Sentiment by MPC Vote Direction',
    labels={
        'vote': 'Vote Direction',
        'sentiment_lexicon_weighted': 'Weighted Sentiment'
    },
    points='all',
    height=500
)
fig_sentimentbyvote.show()

* Speeches tied to “increase” votes consistently show higher sentiment values with relatively low dispersion.
* On the other hand, “reduce” votes are associated with more negative sentiment and tighter clustering, reflecting a more uniformly dovish tone.
* The “maintain” group, however, shows the widest spread, including both positive and negative sentiment scores, highlighting the diversity of views behind seemingly neutral decisions.

***This variability is valuable to the Bank of England’s goal of assessing how MPC communication contributes to market guidance. It underscores the potential to identify outlier views or evolving stances before they materialise into policy actions.***

In [504]:
fig_govvote = px.box(
    speeches_with_votes.dropna(subset=['vote']),
    x='vote',
    y='sentiment_lexicon_weighted',
    color='is_gov',
    title='Governor vs Non-Governor Sentiment by Vote Direction',
    labels={
        'vote': 'Vote Direction',
        'sentiment_lexicon_weighted': 'Weighted Sentiment',
        'is_gov': 'Is Governor (1 = Yes)'
    },
    points='all',
    height=500
)
fig_govvote.show()

* Governors consistently use a more neutral tone across vote types.

* Non-Governors show greater variation:

* More positive sentiment when voting to increase

* More negative when voting to reduce

***This suggests that Governors adopt a more balanced and institutionally cautious communication style, which may serve to stabilise expectations regardless of monetary policy stance. This aligns with the Bank’s mission to provide stability through communication***

In [506]:
# Original speech dataset (speeches_with_votes)
for _, row in event_df.iterrows():
    event_name = row["event"]
    event_date = row["month"]
    col_name = event_name.lower().replace(" ", "_") + "_period"
    
    # Apply before/after tag
    speeches_with_votes[col_name] = speeches_with_votes["date"].apply(
        lambda d: "before" if d < event_date else "after"
    )

In [507]:
fig_gfc = px.box(
    speeches_with_votes.dropna(subset=['vote']),
    x='vote',
    y='sentiment_lexicon_weighted',
    color='is_gov',
    facet_col='global_financial_crisis_period',
    title='Speech Sentiment by Vote & Governor Status (GFC)',
    labels={
        'vote': 'Vote Direction',
        'sentiment_lexicon_weighted': 'Weighted Sentiment',
        'is_gov': 'Is Governor (1 = Yes)',
        'global_financial_crisis_period': 'GFC Period'
    },
    points='all',
    height=600,
    width=1200
)
fig_gfc.show()

**Before the GFC:**
- Across all vote directions, both Governors and non-Governors displayed moderate to positive sentiment, especially in "increase" votes.

- Governors exhibited a slightly more positive and stable tone than non-Governors, particularly for "increase" votes, suggesting confidence in the economic outlook prior to the crisis.

- "Reduce" votes were still associated with lower sentiment, reflecting early signs of caution.

**After the GFC:**
- Sentiment declines noticeably, especially for "reduce" and "maintain" votes, across both roles.

- Governors show a marked drop in sentiment for "reduce" votes — reinforcing their communication of concern and economic uncertainty during and after the crisis.

- Interestingly, for "increase" votes, Governors still maintain higher sentiment than non-Governors, suggesting that when a more optimistic stance was taken, it was communicated more assertively by the central leadership.

- Overall, the spread of sentiment is more compressed after the GFC, especially for "maintain" and "reduce" votes, highlighting a cautious and convergent communication style in a period of high economic stress.


***After the GFC, the tone of communications clearly became more conservative and cautious, particularly when policy actions involved holding or reducing rates. The persistence of more optimistic tone in "increase" votes—especially from Governors—may reflect an effort to reassure markets when tightening was seen as appropriate.***

In [509]:
fig_brexit = px.box(
    speeches_with_votes.dropna(subset=['vote']),
    x='vote',
    y='sentiment_lexicon_weighted',
    color='is_gov',
    facet_col='brexit_vote_period',
    title='Speech Sentiment by Vote & Governor Status (Brexit)',
    labels={
        'vote': 'Vote Direction',
        'sentiment_lexicon_weighted': 'Weighted Sentiment',
        'is_gov': 'Is Governor (1 = Yes)',
        'brexit_vote_period': 'Brexit Period'
    },
    points='all',
    height=600,
    width=1200
)
fig_brexit.show()

In [510]:
fig_covid = px.box(
    speeches_with_votes.dropna(subset=['vote']),
    x='vote',
    y='sentiment_lexicon_weighted',
    color='is_gov',
    facet_col='covid_lockdown_period',
    title='Speech Sentiment by Vote & Governor Status (Pre/Post COVID)',
    labels={
        'vote': 'Vote Direction',
        'sentiment_lexicon_weighted': 'Weighted Sentiment',
        'is_gov': 'Is Governor (1 = Yes)',
        'covid_lockdown_period': 'COVID Period'
    },
    points='all',
    height=600,
    width=1200
)
fig_covid.show()

**Before COVID:**
- Speech sentiment shows a healthy distribution: "increase" votes show positive tone, "reduce" votes lean negative, and "maintain" sits in the middle.

- Governors consistently express higher sentiment, especially for "increase" votes — suggesting confidence in the pre-pandemic economic trajectory.

- Non-Governors show slightly more dispersion in tone, but overall still align with expected policy stances.

**After COVID:**
- There is a notable contraction in sentiment, particularly for "reduce" and "maintain" votes.

- Both Governors and non-Governors exhibit sharply lower sentiment scores, reflecting the economic uncertainty and policy caution during the pandemic.

- "Increase" votes become rare post-COVID, and those that exist show lower overall sentiment, though Governors again show a slightly higher tone than peers.

- Negative or near-zero sentiment dominates, showing a marked shift in central bank communication tone toward crisis management.

**The uniform dip in sentiment across all roles and vote types after COVID illustrates the Bank of England’s shift toward a more serious, risk-aware tone. This confirms that speech sentiment not only reflects internal voting stances but also responds systematically to major economic disruptions.**

### Non-UK

In [513]:
# Select countries of interest
group1 = ['united kingdom', 'united states', 'japan']
group2 = ['united kingdom', 'euro area', 'switzerland', 'sweden']

# Ensure lowercase and cleaned country column (if not already)
speeches['country'] = speeches['country'].str.lower()
speeches['date'] = pd.to_datetime(speeches['date'])
speeches['month'] = speeches['date'].dt.to_period('M').dt.to_timestamp()


In [514]:
def prepare_sentiment(df, country_list, window=6):
    df_filtered = df[df['country'].isin(country_list)].copy()
    df_grouped = (
        df_filtered
        .groupby(['country', 'month'])['sentiment_lexicon_weighted']
        .mean()
        .reset_index()
        .rename(columns={'sentiment_lexicon_weighted': 'avg_sentiment'})
    )
    # Rolling average per country
    df_grouped['rolling_sentiment'] = df_grouped.groupby('country')['avg_sentiment'].transform(
        lambda x: x.rolling(window=window, min_periods=1).mean()
    )
    return df_grouped

#### UK vs USA vs Japan

Why?

* How central banks respond to global events differently

* Compare sentiment before/after COVID, GFC, etc.

* Highlights BoE tone vs Fed (proactive) and BoJ (deflation-focused)

In [517]:
group1_sentiment = prepare_sentiment(speeches, group1)

fig_group1 = px.line(
    group1_sentiment,
    x='month',
    y='rolling_sentiment',
    color='country',
    title='UK vs USA vs Japan: Smoothed Speech Sentiment Over Time',
    labels={
        'month': 'Date',
        'rolling_sentiment': 'Smoothed Sentiment',
        'country': 'Country'
    }
)
fig_group1.update_layout(height=500)

fig_group1.show()

* The UK again shows a more active and optimistic tone, especially in the mid-2000s and during the recovery from the COVID-19 shock (~2020–2022). It maintains a resilient upward tone, even in turbulent periods.

* The USA (Fed) has a more neutral and steady sentiment profile, with minor dips during crisis points (2008, 2020) and moderate recovery afterwards. This is consistent with the Fed’s role as a global anchor — its messaging must remain measured to preserve stability.

* Japan (BoJ) demonstrates the most cautious and suppressed sentiment over the entire period. It never rises to the levels seen in the UK or USA, reflecting the BoJ’s long-standing struggle with deflation and its consistently accommodative policy posture.

***The BoJ’s low sentiment correlates with its crisis-driven policy environment, while the Fed communicates more conservatively despite global leadership. The BoE, in contrast, appears more adaptive in tone — possibly making its speeches more responsive to economic events and market needs. These differences validate sentiment as a useful comparative lens for international policy tone.***

#### UK vs Euro Area vs Switzerland vs Sweden

Why?
* BoE and ECB serve large, systemically important regions

* SNB and Riksbank are more conservative, often silent actors — do they sound different?

* Good for identifying regional divergence in tone

In [521]:
# Remove any speech from Sweden before 1950
speeches = speeches[
    ~((speeches['country'].str.lower() == 'sweden') & (speeches['date'] < '1950-01-01'))
]

# Prepare the data
group2_sentiment = prepare_sentiment(speeches, group2)

fig_group2 = px.line(
    group2_sentiment,
    x='month',
    y='rolling_sentiment',
    color='country',
    title='UK vs Euro Area vs Switzerland vs Sweden: Smoothed Speech Sentiment Over Time',
    labels={
        'month': 'Date',
        'rolling_sentiment': 'Smoothed Sentiment',
        'country': 'Country'
    }
)

fig_group2.update_layout(
    height=500,
    xaxis=dict(
        tickvals=custom_ticks,
        ticktext=custom_labels,
        title='Year'
    )
)

fig_group2.show()

* UK (BoE) consistently expresses stronger and more positive sentiment than the other three. Notably, it leads in tone during pre-GFC years (2003–2007) and again in the early 2020s.

* Euro Area (ECB) shows a flatter and more conservative sentiment profile, with less fluctuation. This reflects the ECB’s traditionally cautious and consensus-driven communication style.

* Switzerland (SNB) exhibits moderate and stable sentiment, rarely hitting extremes — consistent with the country’s reputation for monetary conservatism and neutrality.

* Sweden (Riksbank) displays more volatility, especially from 2005 to 2010, potentially reflecting internal policy experimentation and shifting stance around inflation targets and negative rates.

This supports the idea that sentiment analysis can distinguish between institutional communication cultures. The Bank of England’s relatively more expressive tone may be part of a deliberate strategy to shape expectations, while the ECB and SNB appear more restrained. These differences are meaningful for assessing how central bank speeches influence market perception and policy signaling across Europe.

#### Sentiment Change Around Events

In [524]:
import pandas as pd

def sentiment_change_around_events(df, countries, events, window_months=12):
    df = df.copy()
    df['date'] = pd.to_datetime(df['date'])
    results = []

    for event_name, event_date_str in events:
        event_date = pd.to_datetime(event_date_str)
        before_window = (event_date - pd.DateOffset(months=window_months))
        after_window = (event_date + pd.DateOffset(months=window_months))
        
        for country in countries:
            country_df = df[df['country'].str.lower() == country.lower()]
            before_sentiment = country_df[
                (country_df['date'] >= before_window) & 
                (country_df['date'] < event_date)
            ]['sentiment_lexicon_weighted'].mean()

            after_sentiment = country_df[
                (country_df['date'] > event_date) & 
                (country_df['date'] <= after_window)
            ]['sentiment_lexicon_weighted'].mean()

            change = after_sentiment - before_sentiment
            results.append({
                'Country': country.title(),
                'Event': event_name,
                'Before Avg Sentiment': round(before_sentiment, 3),
                'After Avg Sentiment': round(after_sentiment, 3),
                'Change': round(change, 3),
                'Direction': '↑' if change > 0 else '↓' if change < 0 else '→'
            })
    
    return pd.DataFrame(results)


In [525]:
events = [
    ("Global Financial Crisis", "2008-09"),
    ("Brexit Vote", "2016-06"),
    ("COVID Lockdown", "2020-03")
]

selected_countries = ['united kingdom', 'united states', 'japan', 'euro area', 'sweden', 'switzerland']

sentiment_change_df = sentiment_change_around_events(speeches, selected_countries, events)

# Show result
sentiment_change_df

Unnamed: 0,Country,Event,Before Avg Sentiment,After Avg Sentiment,Change,Direction
0,United Kingdom,Global Financial Crisis,0.082,-0.011,-0.093,↓
1,United States,Global Financial Crisis,0.128,0.027,-0.101,↓
2,Japan,Global Financial Crisis,0.259,-0.027,-0.286,↓
3,Euro Area,Global Financial Crisis,0.365,0.161,-0.204,↓
4,Sweden,Global Financial Crisis,0.178,-0.025,-0.203,↓
5,Switzerland,Global Financial Crisis,-0.011,-0.024,-0.014,↓
6,United Kingdom,Brexit Vote,0.092,0.179,0.087,↑
7,United States,Brexit Vote,0.1,0.153,0.053,↑
8,Japan,Brexit Vote,0.261,0.319,0.058,↑
9,Euro Area,Brexit Vote,0.192,0.192,0.0,↑


In [526]:
# Filter to selected countries only
filtered_df = speeches[
    speeches['country'].str.lower().isin(selected_countries)
].copy()

# Standard deviation of sentiment by country
sentiment_volatility = (
    filtered_df.groupby(filtered_df['country'].str.title())['sentiment_lexicon_weighted']
    .std()
    .reset_index()
    .rename(columns={'sentiment_lexicon_weighted': 'Sentiment Std Dev'})
    .sort_values(by='Sentiment Std Dev', ascending=False)
)

# Show result
sentiment_volatility

Unnamed: 0,country,Sentiment Std Dev
0,Euro Area,0.248908
5,United States,0.237048
1,Japan,0.234558
3,Switzerland,0.210639
4,United Kingdom,0.200787
2,Sweden,0.19184


**Euro Area – Most Volatile**

* The European Central Bank (ECB) shows the highest sentiment volatility.

* This likely reflects both:

* The challenges of managing 20+ member states

* Shifting tones as the ECB navigated crises like the Eurozone debt crisis, Brexit, and COVID.

* The ECB often faces internal dissent, which may result in more inconsistent external messaging.

**United States – High Volatility**

* The Federal Reserve ranks second.

* This is consistent with its frequent communication, and transparent signaling of policy direction.

* The Fed adjusts tone to market conditions, and uses speeches to manage expectations, leading to natural tone shifts.

**Japan – Surprisingly Volatile**

* Despite its reputation for cautious communication, the Bank of Japan shows significant variability.

* This may reflect changes in tone around unconventional policies (e.g., yield curve control, negative interest rates) and the communication challenges of fighting persistent deflation.

**Switzerland & Sweden – Most Stable**

* Switzerland (SNB) and Sweden (Riksbank) have the least volatile sentiment.

* These smaller, independent central banks likely maintain a measured and conservative tone, with fewer external pressures and a focus on currency or inflation stability.

* Their tone reflects institutional caution and limited communication variance.

**United Kingdom – Mid-Low Volatility**

* The Bank of England sits in the middle–low range, suggesting it maintains tone discipline even during shocks like the GFC, Brexit, and COVID.

* BoE speeches seem to reflect policy stance changes without excessive shifts in sentiment, supporting the goal of reassurance and market stability.

#### Volatility by is_gov

In [529]:
# Ensure clean format
df = speeches.copy()
df['country'] = df['country'].str.title()
df['is_gov'] = df['is_gov'].fillna(0).astype(int)  # make sure it's 0/1

# Group by country and role
gov_volatility = (
    df[df['country'].isin([c.title() for c in selected_countries])]
    .groupby(['country', 'is_gov'])['sentiment_lexicon_weighted']
    .std()
    .reset_index()
    .rename(columns={'sentiment_lexicon_weighted': 'Sentiment Std Dev'})
    .sort_values(by=['country', 'is_gov'], ascending=[True, False])
)

# Add readable label for role
gov_volatility['Role'] = gov_volatility['is_gov'].map({1: 'Governor', 0: 'Non-Gov'})
gov_volatility = gov_volatility[['country', 'Role', 'Sentiment Std Dev']]
gov_volatility

Unnamed: 0,country,Role,Sentiment Std Dev
1,Euro Area,Governor,0.239553
0,Euro Area,Non-Gov,0.246279
3,Japan,Governor,0.250986
2,Japan,Non-Gov,0.223403
5,Sweden,Governor,0.185296
4,Sweden,Non-Gov,0.19415
7,Switzerland,Governor,0.20989
6,Switzerland,Non-Gov,0.211913
9,United Kingdom,Governor,0.227179
8,United Kingdom,Non-Gov,0.194019


In [530]:
fig_volatility = px.bar(
    gov_volatility,
    x='country',
    y='Sentiment Std Dev',
    color='Role',
    barmode='group',
    title='Sentiment Volatility by Country and Role (Governor vs Non-Governor)',
    labels={'country': 'Country', 'Sentiment Std Dev': 'Std Dev of Sentiment'}
)
fig_volatility.update_layout(height=450)
fig_volatility.show()

* Governor speeches are not always more stable — in fact, in high-stakes institutions like the Fed and BoJ, the Governor's tone is more reactive, not less.

* The UK Governor’s speech tone is moderately more volatile, supporting the idea that the BoE uses high-level speeches strategically to influence expectations.

* Switzerland stands out as the most tone-consistent institution, regardless of role — a reflection of its traditional, cautious policy posture.

#### Sentiment Change Around Events by Role

In [533]:
def sentiment_change_by_role(df, countries, events, window_months=12):
    df = df.copy()
    df['date'] = pd.to_datetime(df['date'])
    df['is_gov'] = df['is_gov'].fillna(0).astype(int)
    results = []

    for event_name, event_date_str in events:
        event_date = pd.to_datetime(event_date_str)
        before_window = event_date - pd.DateOffset(months=window_months)
        after_window = event_date + pd.DateOffset(months=window_months)

        for country in countries:
            for role in [0, 1]:
                role_df = df[
                    (df['country'].str.lower() == country.lower()) &
                    (df['is_gov'] == role)
                ]

                before = role_df[
                    (role_df['date'] >= before_window) & (role_df['date'] < event_date)
                ]['sentiment_lexicon_weighted'].mean()

                after = role_df[
                    (role_df['date'] > event_date) & (role_df['date'] <= after_window)
                ]['sentiment_lexicon_weighted'].mean()

                change = after - before
                results.append({
                    'Country': country.title(),
                    'Role': 'Governor' if role == 1 else 'Non-Gov',
                    'Event': event_name,
                    'Before Avg Sentiment': round(before, 3),
                    'After Avg Sentiment': round(after, 3),
                    'Change': round(change, 3),
                    'Direction': '↑' if change > 0 else '↓' if change < 0 else '→'
                })
    
    return pd.DataFrame(results)


In [534]:
events = [
    ("Global Financial Crisis", "2008-09"),
    ("Brexit Vote", "2016-06"),
    ("COVID Lockdown", "2020-03")
]

selected_countries = ['united kingdom', 'united states', 'japan', 'euro area', 'sweden', 'switzerland']

sentiment_by_role_events = sentiment_change_by_role(speeches, selected_countries, events)

# View table
sentiment_by_role_events

Unnamed: 0,Country,Role,Event,Before Avg Sentiment,After Avg Sentiment,Change,Direction
0,United Kingdom,Non-Gov,Global Financial Crisis,0.085,0.002,-0.083,↓
1,United Kingdom,Governor,Global Financial Crisis,0.035,-0.201,-0.236,↓
2,United States,Non-Gov,Global Financial Crisis,0.119,-0.036,-0.155,↓
3,United States,Governor,Global Financial Crisis,0.151,0.104,-0.046,↓
4,Japan,Non-Gov,Global Financial Crisis,0.258,-0.034,-0.293,↓
5,Japan,Governor,Global Financial Crisis,0.259,-0.018,-0.277,↓
6,Euro Area,Non-Gov,Global Financial Crisis,0.344,0.142,-0.202,↓
7,Euro Area,Governor,Global Financial Crisis,0.405,0.2,-0.205,↓
8,Sweden,Non-Gov,Global Financial Crisis,0.185,-0.035,-0.22,↓
9,Sweden,Governor,Global Financial Crisis,0.158,0.007,-0.152,↓


**Global Financial Crisis (GFC)**
* Universal drop in sentiment — every country and every role shifted to a more negative tone. But:

* UK Governor speeches dropped sharply (−0.236) — suggesting high responsiveness and seriousness in tone.

* Japan & Euro Area Governors and Non-Governors both showed large tone drops (~−0.2 to −0.3), signaling deep concern across leadership.

* US Governor sentiment dropped less than Non-Govs (−0.046 vs −0.155), suggesting tone stability or cautious reassurance from the Fed Chair.

* Switzerland was unique — Non-Gov sentiment increased (+0.151), possibly reflecting neutral or stabilizing messaging from lower-profile speakers.

***Governors generally reduced tone more strongly, emphasizing their role in crisis messaging. But the Fed and SNB took a calmer, stabilizing approach,***

**Brexit Vote (2016)**
* This event was UK-centric, but triggered modest global tone shifts.

* UK Governors and Non-Govs both raised sentiment, especially Governors (+0.148) — likely a deliberate reassurance strategy.

* ECB Governors also increased sentiment (+0.123), while ECB Non-Govs dropped slightly — perhaps reflecting split communication strategy within the Euro Area.

* Japan, USA, Switzerland — all show small or moderate positive tone shifts, showing international concern but measured optimism.

* Sweden Non-Govs dropped sentiment (−0.059), while Governors raised it (+0.037), indicating role-based divergence.

***Brexit triggered a measured positive shift from most central banks — with Governors generally leading the upward tone shift, likely to maintain market stability.***

**COVID Lockdown (2020)**
* A global sentiment collapse across all roles and countries — one of the most consistent findings.

* BoE and Fed Governors and Non-Govs all dropped sentiment by ~−0.14 to −0.17.

* Japan’s Governor cut tone more sharply (−0.198) than its Non-Govs (−0.052) — strong leadership response.

* Switzerland and Sweden both saw big tone drops — especially from Non-Govs (−0.229 for Switzerland, −0.219 for Sweden).

* Euro Area: large tone reductions again, but Non-Govs fell further (−0.186 vs −0.119).

***COVID led to across-the-board tone softening — but again, Governors responded more sharply in tone, especially in Japan, UK, and Switzerland.***