In [111]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

In [112]:
df = pd.read_csv('all_speeches_and_person.csv', parse_dates=['hdate'])

## Speech frequency by party each year
* Range: 2001-06-07 - 2023-12-31
* Labour/Co-operative as Labour

In [113]:
df_filtered = df[(df['hdate'] >= '2001-06-07') & (df['hdate'] <= '2023-12-31')]
df_filtered = df_filtered.dropna(subset=['person_id'])
df_filtered['party_name'] = np.where(
    df_filtered['person_id'] == 'uk.org.publicwhip/person/13935',
    'Independent',
    df_filtered['party_name']
)
df_filtered['party_name'] = np.where(
    df_filtered['person_id'] == 'uk.org.publicwhip/person/13377',
    'Conservative',
    df_filtered['party_name']
)
df_filtered['party_name'] = np.where(
    df_filtered['person_id'] == 'uk.org.publicwhip/person/24726',
    'Labour',
    df_filtered['party_name']
)

df_filtered['party_name'] = df_filtered['party_name'].str.replace('Labour/Co-operative', 'Labour')
df_filtered['party_name'] = df_filtered['party_name'].str.replace('DUP', 'Democratic Unionist Party')


In [114]:
df_party_year_long = df_filtered.groupby([pd.Grouper(key='hdate', freq='YS'), 'party_name']).size().reset_index(name='number_of_speeches')
df_party_year_long.rename(columns={'hdate': 'year'}, inplace=True)
df_party_year_wide = df_party_year_long.pivot(index='year', columns='party_name', values='number_of_speeches')
df_party_year_wide.reset_index(inplace=True)
df_party_year_wide.fillna(0, inplace=True)
column_sums = df_party_year_wide.sum(numeric_only=True)
sorted_columns = column_sums.sort_values(ascending=False).index
df_party_year_wide_sorted = df_party_year_wide[sorted_columns]
df_party_year_wide_sorted.insert(0, 'year', df_party_year_wide['year'])
columns_to_keep = ['year', 'Conservative', 'Labour', 'Liberal Democrat', 'Crossbench', 'Scottish National Party', 'Democratic Unionist Party']
df_party_year_wide_sorted_grouped = df_party_year_wide_sorted[columns_to_keep].copy()
df_party_year_wide_sorted_grouped['Other affiliations'] = df_party_year_wide_sorted.drop(columns=columns_to_keep).sum(axis=1)
df_party_year_wide_sorted_grouped.replace(0, None, inplace=True)

In [115]:
fig = go.Figure()

def add_bar(column, color):
    fig.add_trace(
        go.Bar(
            name = column,
            x = df_party_year_wide_sorted_grouped['year'],
            y = df_party_year_wide_sorted_grouped[column],
            marker_color = color,
            marker_line_width = 0,
            opacity = 0.6,
            hovertemplate = '%{y}'
        )
    )

add_bar('Conservative', '#0087DC')
add_bar('Labour', '#E4003B')
add_bar('Liberal Democrat', '#FAA61A')
add_bar('Crossbench', 'grey')
add_bar('Scottish National Party', '#FDF38E')
add_bar('Democratic Unionist Party', '#D46A4C')
add_bar('Other affiliations', '#BDBDBD')

fig.update_layout(
    title = "Parliamentary Mentions of Hong Kong",
    showlegend = True,
    legend_orientation = 'h',
    margin = dict(t=90, b=30, l=80, r=80),
    barmode = 'stack',
    hovermode = 'x unified',
    template = 'plotly_white',
    paper_bgcolor = '#eaeaea',
    plot_bgcolor = '#eaeaea',
    hoverlabel = dict(
        bgcolor = '#eaeaea'
    ),
)

fig.update_xaxes(
    linecolor = '#d6d6d6',
    hoverformat = '%Y',
    rangeselector= dict(
        buttons = list([
            dict(count=1,
                label="1y",
                step="year",
                stepmode="backward"),
            dict(count=5,
                label="5y",
                step="year",
                stepmode="backward"),
            dict(count=10,
                label="10y",
                step="year",
                stepmode="backward"),
            dict(step="all")
        ]),
        bgcolor = '#eaeaea',
        x = 0,
        y = 1
    ),
)

fig.update_yaxes(
    tickformat = ',',
    gridcolor = '#d6d6d6',
    title = 'No. of mentions'
)

fig.show()

## Speech frequency by party each month
* Range: 2001-06-07 - 2023-12-31
* Labour/Co-operative as Labour

In [116]:
df_party_month_long = df_filtered.groupby([pd.Grouper(key='hdate', freq='MS'), 'party_name']).size().reset_index(name='number_of_speeches')
df_party_month_long.rename(columns={'hdate': 'month'}, inplace=True)
df_party_month_wide = df_party_month_long.pivot(index='month', columns='party_name', values='number_of_speeches')
df_party_month_wide.reset_index(inplace=True)
df_party_month_wide.fillna(0, inplace=True)
column_sums = df_party_month_wide.sum(numeric_only=True)
sorted_columns = column_sums.sort_values(ascending=False).index
df_party_month_wide_sorted = df_party_month_wide[sorted_columns]
df_party_month_wide_sorted.insert(0, 'month', df_party_month_wide['month'])
columns_to_keep = ['month', 'Conservative', 'Labour', 'Liberal Democrat', 'Crossbench', 'Scottish National Party', 'Democratic Unionist Party']
df_party_month_wide_sorted_grouped = df_party_month_wide_sorted[columns_to_keep].copy()
df_party_month_wide_sorted_grouped['Other affiliations'] = df_party_month_wide_sorted.drop(columns=columns_to_keep).sum(axis=1)
df_party_month_wide_sorted_grouped.replace(0, None, inplace=True)

In [117]:
fig = go.Figure()

def add_bar(column, color):
    fig.add_trace(
        go.Bar(
            name = column,
            x = df_party_month_wide_sorted_grouped['month'],
            y = df_party_month_wide_sorted_grouped[column],
            marker_color = color,
            marker_line_width = 0,
            opacity = 0.6,
            hovertemplate = '%{y}'
        )
    )

add_bar('Conservative', '#0087DC')
add_bar('Labour', '#E4003B')
add_bar('Liberal Democrat', '#FAA61A')
add_bar('Crossbench', 'grey')
add_bar('Scottish National Party', '#FDF38E')
add_bar('Democratic Unionist Party', '#D46A4C')
add_bar('Other affiliations', '#BDBDBD')

fig.update_layout(
    title = "Parliamentary Mentions of Hong Kong",
    showlegend = True,
    legend_orientation = 'h',
    margin = dict(t=90, b=30, l=80, r=80),
    barmode = 'stack',
    hovermode = 'x unified',
    template = 'plotly_white',
    paper_bgcolor = '#eaeaea',
    plot_bgcolor = '#eaeaea',
    hoverlabel = dict(
        bgcolor = '#eaeaea'
    ),
    bargap = 0.01
)

fig.update_xaxes(
    linecolor = '#d6d6d6',
    hoverformat = '%B %Y',
    rangeselector= dict(
        buttons = list([
            dict(count=1,
                label="1y",
                step="year",
                stepmode="backward"),
            dict(count=5,
                label="5y",
                step="year",
                stepmode="backward"),
            dict(count=10,
                label="10y",
                step="year",
                stepmode="backward"),
            dict(step="all")
        ]),
        bgcolor = '#eaeaea',
        x = 0,
        y = 1
    ),
)

fig.update_yaxes(
    tickformat = ',',
    gridcolor = '#d6d6d6',
    title = 'No. of mentions'
)

fig.show()

## Normalise frequency by seats distribution
* Labour and Co-operative Party are treated as Labour.
* There are slight inaccuracies in calculation of Lord membership.
* Only speeches in 'Lord debates', 'Westminster Hall debates', 'Commons debates', 'Public Bill Committees', 'Written Answers', 'Written Ministerial Statements' are counted.

In [118]:
parliament_membership = pd.read_csv('intermediate_outputs/monthly_uk_parliament_membership.csv', parse_dates=['month'])

In [119]:
columns_to_keep = ['month', 'Conservative', 'Labour', 'Liberal Democrat', 'Crossbench', 'Scottish National Party', 'DUP', 'total_seats']
parliament_membership_simplified = parliament_membership[columns_to_keep].copy()
parliament_membership_simplified.rename(columns={'DUP': 'Democratic Unionist Party'}, inplace=True)
parliament_membership_simplified['Other affiliations'] = parliament_membership.drop(columns = columns_to_keep).sum(axis=1)

In [120]:
df_filtered_parliament = df_filtered[df_filtered['debate_type'].isin(['Lord debates', 'Westminster Hall debates', 'Commons debates', 'Public Bill Committees', 'Written Answers', 'Written Ministerial Statements'])]
df_party_month_long_parliament = df_filtered_parliament.groupby([pd.Grouper(key='hdate', freq='MS'), 'party_name']).size().reset_index(name='number_of_speeches')
df_party_month_long_parliament.rename(columns={'hdate': 'month'}, inplace=True)
df_party_month_wide_parliament = df_party_month_long_parliament.pivot(index='month', columns='party_name', values='number_of_speeches')
df_party_month_wide_parliament.reset_index(inplace=True)
df_party_month_wide_parliament.fillna(0, inplace=True)
columns_to_keep = ['month', 'Conservative', 'Labour', 'Liberal Democrat', 'Crossbench', 'Scottish National Party', 'Democratic Unionist Party']
df_party_month_wide_parliament_simplified = df_party_month_wide_parliament[columns_to_keep].copy()
df_party_month_wide_parliament_simplified['Other affiliations'] = df_party_month_wide_parliament.drop(columns=columns_to_keep).sum(axis=1)

In [121]:
monthly_speech_membership = df_party_month_wide_parliament_simplified.merge(
    parliament_membership_simplified,
    how='left',
    on='month',
    suffixes=[None, '_seats']
)

for party in ['Conservative', 'Labour', 'Liberal Democrat', 'Crossbench', 'Scottish National Party', 'Democratic Unionist Party', 'Other affiliations']:
    party_seats = party + '_seats'
    column_name = party + '_speech_per_member'
    monthly_speech_membership[column_name] = monthly_speech_membership[party] / monthly_speech_membership[party_seats]
    monthly_speech_membership[column_name].replace(0, None, inplace=True)

### Monthly graph

In [122]:
fig = go.Figure()

def add_bar(column, color):
    column_name = column + '_speech_per_member'
    fig.add_trace(
        go.Bar(
            name = column,
            x = monthly_speech_membership['month'],
            y = monthly_speech_membership[column_name],
            marker_color = color,
            marker_line_width = 0,
            opacity = 0.6,
            hovertemplate = '%{y:.3f}'
        )
    )

add_bar('Conservative', '#0087DC')
add_bar('Labour', '#E4003B')
add_bar('Liberal Democrat', '#FAA61A')
add_bar('Crossbench', 'grey')
add_bar('Scottish National Party', '#FDF38E')
add_bar('Democratic Unionist Party', '#D46A4C')
add_bar('Other affiliations', '#BDBDBD')

fig.update_layout(
    title = 'UK Parliament Mentions of Hong Kong per Party Member',
    showlegend = True,
    legend_orientation = 'h',
    margin = dict(t=90, b=30, l=80, r=80),
    barmode = 'stack',
    hovermode = 'x unified',
    template = 'plotly_white',
    paper_bgcolor = '#eaeaea',
    plot_bgcolor = '#eaeaea',
    hoverlabel = dict(
        bgcolor = '#eaeaea'
    ),
    bargap = 0.01
)

fig.update_xaxes(
    linecolor = '#d6d6d6',
    hoverformat = '%B %Y',
    rangeselector= dict(
        buttons = list([
            dict(count=1,
                label="1y",
                step="year",
                stepmode="backward"),
            dict(count=5,
                label="5y",
                step="year",
                stepmode="backward"),
            dict(count=10,
                label="10y",
                step="year",
                stepmode="backward"),
            dict(step="all")
        ]),
        bgcolor = '#eaeaea',
        x = 0,
        y = 1
    ),
)

fig.update_yaxes(
    tickformat = ',',
    gridcolor = '#d6d6d6',
    title = 'Mentions per member'
)

fig.show()

### Yearly graph

In [123]:
yearly_speech_membership = monthly_speech_membership.groupby(pd.Grouper(key='month', freq='YS'))[['Conservative_speech_per_member', 'Labour_speech_per_member', 'Liberal Democrat_speech_per_member', 'Crossbench_speech_per_member', 'Scottish National Party_speech_per_member', 'Democratic Unionist Party_speech_per_member', 'Other affiliations_speech_per_member']].sum()
yearly_speech_membership.reset_index(names='year', inplace=True)

In [130]:
fig = go.Figure()

def add_bar(column, color):
    column_name = column + '_speech_per_member'
    fig.add_trace(
        go.Bar(
            name = column,
            x = yearly_speech_membership['year'],
            y = yearly_speech_membership[column_name],
            marker_color = color,
            marker_line_width = 0,
            opacity = 0.6,
            hovertemplate = '%{y:.2f}'
        )
    )

add_bar('Conservative', '#0087DC')
add_bar('Labour', '#E4003B')
add_bar('Liberal Democrat', '#FAA61A')
add_bar('Crossbench', 'grey')
add_bar('Scottish National Party', '#FDF38E')
add_bar('Democratic Unionist Party', '#D46A4C')
add_bar('Other affiliations', '#BDBDBD')

fig.update_layout(
    title = "UK Parliament Mentions of Hong Kong per Party Member",
    showlegend = True,
    legend_orientation = 'h',
    margin = dict(t=90, b=30, l=80, r=80),
    barmode = 'stack',
    hovermode = 'x unified',
    template = 'plotly_white',
    paper_bgcolor = '#eaeaea',
    plot_bgcolor = '#eaeaea',
    hoverlabel = dict(
        bgcolor = '#eaeaea'
    ),
)

fig.update_xaxes(
    linecolor = '#d6d6d6',
    hoverformat = '%Y',
    rangeselector= dict(
        buttons = list([
            dict(count=1,
                label="1y",
                step="year",
                stepmode="backward"),
            dict(count=5,
                label="5y",
                step="year",
                stepmode="backward"),
            dict(count=10,
                label="10y",
                step="year",
                stepmode="backward"),
            dict(step="all")
        ]),
        bgcolor = '#eaeaea',
        x = 0,
        y = 1
    ),
)

fig.update_yaxes(
    tickformat = ',',
    gridcolor = '#d6d6d6',
    title = 'Mentions per member'
)

fig.show()

In [139]:
fig = go.Figure()

def add_line(column, color):
    column_name = column + '_speech_per_member'
    fig.add_trace(
        go.Scatter(
            mode = 'lines',
            name = column,
            x = yearly_speech_membership['year'],
            y = yearly_speech_membership[column_name],
            marker_color = color,
            line_width = 2.5,
            opacity = 0.8,
            hovertemplate = '%{y:.2f}'
        )
    )

add_line('Conservative', '#0087DC')
add_line('Labour', '#E4003B')
add_line('Liberal Democrat', '#FAA61A')
add_line('Crossbench', 'grey')
add_line('Scottish National Party', '#FDF38E')
add_line('Democratic Unionist Party', '#D46A4C')
add_line('Other affiliations', '#BDBDBD')

fig.update_layout(
    title = "UK Parliament Mentions of Hong Kong per Party Member",
    showlegend = True,
    legend_orientation = 'h',
    margin = dict(t=90, b=30, l=80, r=80),
    barmode = 'stack',
    hovermode = 'x unified',
    template = 'plotly_white',
    paper_bgcolor = '#eaeaea',
    plot_bgcolor = '#eaeaea',
    hoverlabel = dict(
        bgcolor = '#eaeaea'
    ),
)

fig.update_xaxes(
    linecolor = '#d6d6d6',
    hoverformat = '%Y',
    rangeselector= dict(
        buttons = list([
            dict(count=1,
                label="1y",
                step="year",
                stepmode="backward"),
            dict(count=5,
                label="5y",
                step="year",
                stepmode="backward"),
            dict(count=10,
                label="10y",
                step="year",
                stepmode="backward"),
            dict(step="all")
        ]),
        bgcolor = '#eaeaea',
        x = 0,
        y = 1
    ),
    showgrid=False
)

fig.update_yaxes(
    tickformat = ',',
    gridcolor = '#d6d6d6',
    title = 'Mentions per member',
    rangemode = 'tozero'
)

fig.show()

In [125]:
yearly_speech_membership.columns

Index(['year', 'Conservative_speech_per_member', 'Labour_speech_per_member',
       'Liberal Democrat_speech_per_member', 'Crossbench_speech_per_member',
       'Scottish National Party_speech_per_member',
       'Democratic Unionist Party_speech_per_member',
       'Other affiliations_speech_per_member'],
      dtype='object')

In [126]:
yearly_speech_membership.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23 entries, 0 to 22
Data columns (total 8 columns):
 #   Column                                       Non-Null Count  Dtype         
---  ------                                       --------------  -----         
 0   year                                         23 non-null     datetime64[ns]
 1   Conservative_speech_per_member               23 non-null     float64       
 2   Labour_speech_per_member                     23 non-null     float64       
 3   Liberal Democrat_speech_per_member           23 non-null     float64       
 4   Crossbench_speech_per_member                 23 non-null     float64       
 5   Scottish National Party_speech_per_member    23 non-null     float64       
 6   Democratic Unionist Party_speech_per_member  23 non-null     float64       
 7   Other affiliations_speech_per_member         23 non-null     float64       
dtypes: datetime64[ns](1), float64(7)
memory usage: 1.6 KB


In [127]:
parliament_membership_simplified.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 276 entries, 0 to 275
Data columns (total 9 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   month                      276 non-null    datetime64[ns]
 1   Conservative               276 non-null    int64         
 2   Labour                     276 non-null    int64         
 3   Liberal Democrat           276 non-null    int64         
 4   Crossbench                 276 non-null    int64         
 5   Scottish National Party    269 non-null    float64       
 6   Democratic Unionist Party  274 non-null    float64       
 7   total_seats                276 non-null    float64       
 8   Other affiliations         276 non-null    float64       
dtypes: datetime64[ns](1), float64(4), int64(4)
memory usage: 19.5 KB


In [128]:
df_party_month_wide_parliament_simplified.info()

# column_sums = df_party_month_wide.sum(numeric_only=True)
# sorted_columns = column_sums.sort_values(ascending=False).index
# df_party_month_wide_sorted = df_party_month_wide[sorted_columns]
# df_party_month_wide_sorted.insert(0, 'month', df_party_month_wide['month'])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 8 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   month                      244 non-null    datetime64[ns]
 1   Conservative               244 non-null    float64       
 2   Labour                     244 non-null    float64       
 3   Liberal Democrat           244 non-null    float64       
 4   Crossbench                 244 non-null    float64       
 5   Scottish National Party    244 non-null    float64       
 6   Democratic Unionist Party  244 non-null    float64       
 7   Other affiliations         244 non-null    float64       
dtypes: datetime64[ns](1), float64(7)
memory usage: 15.4 KB


In [129]:
df_filtered_parliament.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5310 entries, 9209 to 16435
Data columns (total 21 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   gid                    5310 non-null   object        
 1   hdate                  5310 non-null   datetime64[ns]
 2   parent_body            5310 non-null   object        
 3   file_name              5310 non-null   object        
 4   html_file_name         3659 non-null   object        
 5   debate_type            5310 non-null   object        
 6   written_type           5310 non-null   object        
 7   speech_body            5310 non-null   object        
 8   full_url               5310 non-null   object        
 9   relevant_speeches      5310 non-null   float64       
 10  speaker_id             5310 non-null   object        
 11  person_id              5310 non-null   object        
 12  speaker_name           5310 non-null   object        
 13 