In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

In [2]:
df = pd.read_csv('all_speeches_and_person.csv', parse_dates=['hdate'])

## Speech frequency by party each year
Range: 2001-06-07 - 2023-12-31

In [3]:
df_filtered = df[(df['hdate'] >= '2001-06-07') & (df['hdate'] <= '2023-12-31')]
df_filtered = df_filtered.dropna(subset=['person_id'])
df_filtered['party_name'] = np.where(
    df_filtered['person_id'] == 'uk.org.publicwhip/person/13935',
    'Independent',
    df_filtered['party_name']
)
df_filtered['party_name'] = np.where(
    df_filtered['person_id'] == 'uk.org.publicwhip/person/13377',
    'Conservative',
    df_filtered['party_name']
)
df_filtered['party_name'] = np.where(
    df_filtered['person_id'] == 'uk.org.publicwhip/person/24726',
    'Labour',
    df_filtered['party_name']
)

In [23]:
df_party_year_long = df_filtered.groupby([pd.Grouper(key='hdate', freq='YS'), 'party_name']).size().reset_index(name='number_of_speeches')
df_party_year_long.rename(columns={'hdate': 'year'}, inplace=True)
df_party_year_wide = df_party_year_long.pivot(index='year', columns='party_name', values='number_of_speeches')
df_party_year_wide.reset_index(inplace=True)
df_party_year_wide.fillna(0, inplace=True)
column_sums = df_party_year_wide.sum(numeric_only=True)
sorted_columns = column_sums.sort_values(ascending=False).index
df_party_year_wide_sorted = df_party_year_wide[sorted_columns]
df_party_year_wide_sorted.insert(0, 'year', df_party_year_wide['year'])
columns_to_keep = ['year', 'Conservative', 'Labour', 'Liberal Democrat', 'Crossbench', 'Scottish National Party', 'DUP']
df_party_year_wide_sorted_grouped = df_party_year_wide_sorted[columns_to_keep].copy()
df_party_year_wide_sorted_grouped.rename(columns={'DUP': 'Democratic Unionist Party'}, inplace=True)
df_party_year_wide_sorted_grouped['Other affiliations'] = df_party_year_wide_sorted.drop(columns=columns_to_keep).sum(axis=1)

In [32]:
fig = go.Figure()

def add_bar(column, color):
    fig.add_trace(
        go.Bar(
            name = column,
            x = df_party_year_wide_sorted_grouped['year'],
            y = df_party_year_wide_sorted_grouped[column],
            marker_color = color,
            marker_line_width = 0,
            opacity = 0.6,
            hovertemplate = '%{y}'
        )
    )

add_bar('Conservative', '#0087DC')
add_bar('Labour', '#E4003B')
add_bar('Liberal Democrat', '#FAA61A')
add_bar('Crossbench', 'grey')
add_bar('Scottish National Party', '#FDF38E')
add_bar('Democratic Unionist Party', '#D46A4C')
add_bar('Other affiliations', '#BDBDBD')

fig.update_layout(
    title = "Parliamentary Mentions of Hong Kong",
    showlegend = True,
    legend_orientation = 'h',
    margin = dict(t=90, b=30, l=80, r=80),
    barmode = 'stack',
    hovermode = 'x unified',
    template = 'plotly_white',
    paper_bgcolor = '#eaeaea',
    plot_bgcolor = '#eaeaea',
    hoverlabel = dict(
        bgcolor = '#eaeaea'
    ),
)

fig.update_xaxes(
    linecolor = '#d6d6d6',
    hoverformat = '%Y',
    rangeselector= dict(
        buttons = list([
            dict(count=1,
                label="1y",
                step="year",
                stepmode="backward"),
            dict(count=5,
                label="5y",
                step="year",
                stepmode="backward"),
            dict(count=10,
                label="10y",
                step="year",
                stepmode="backward"),
            dict(step="all")
        ]),
        bgcolor = '#eaeaea',
        x = 0,
        y = 1
    ),
)

fig.update_yaxes(
    tickformat = ',',
    gridcolor = '#d6d6d6',
    title = 'No. of speeches'
)

fig.show()

## Speech frequency by party each month
Range: 2001-06-07 - 2023-12-31

In [30]:
df_party_month_long = df_filtered.groupby([pd.Grouper(key='hdate', freq='MS'), 'party_name']).size().reset_index(name='number_of_speeches')
df_party_month_long.rename(columns={'hdate': 'month'}, inplace=True)
df_party_month_wide = df_party_month_long.pivot(index='month', columns='party_name', values='number_of_speeches')
df_party_month_wide.reset_index(inplace=True)
df_party_month_wide.fillna(0, inplace=True)
column_sums = df_party_month_wide.sum(numeric_only=True)
sorted_columns = column_sums.sort_values(ascending=False).index
df_party_month_wide_sorted = df_party_month_wide[sorted_columns]
df_party_month_wide_sorted.insert(0, 'month', df_party_month_wide['month'])
columns_to_keep = ['month', 'Conservative', 'Labour', 'Liberal Democrat', 'Crossbench', 'Scottish National Party', 'DUP']
df_party_month_wide_sorted_grouped = df_party_month_wide_sorted[columns_to_keep].copy()
df_party_month_wide_sorted_grouped.rename(columns={'DUP': 'Democratic Unionist Party'}, inplace=True)
df_party_month_wide_sorted_grouped['Other affiliations'] = df_party_month_wide_sorted.drop(columns=columns_to_keep).sum(axis=1)

In [45]:
fig = go.Figure()

def add_bar(column, color):
    fig.add_trace(
        go.Bar(
            name = column,
            x = df_party_month_wide_sorted_grouped['month'],
            y = df_party_month_wide_sorted_grouped[column],
            marker_color = color,
            marker_line_width = 0,
            opacity = 0.6,
            hovertemplate = '%{y}'
        )
    )

add_bar('Conservative', '#0087DC')
add_bar('Labour', '#E4003B')
add_bar('Liberal Democrat', '#FAA61A')
add_bar('Crossbench', 'grey')
add_bar('Scottish National Party', '#FDF38E')
add_bar('Democratic Unionist Party', '#D46A4C')
add_bar('Other affiliations', '#BDBDBD')

fig.update_layout(
    title = "Parliamentary Mentions of Hong Kong",
    showlegend = True,
    legend_orientation = 'h',
    margin = dict(t=90, b=30, l=80, r=80),
    barmode = 'stack',
    hovermode = 'x unified',
    template = 'plotly_white',
    paper_bgcolor = '#eaeaea',
    plot_bgcolor = '#eaeaea',
    hoverlabel = dict(
        bgcolor = '#eaeaea'
    ),
    bargap = 0.01
)

fig.update_xaxes(
    linecolor = '#d6d6d6',
    hoverformat = '%B %Y',
    rangeselector= dict(
        buttons = list([
            dict(count=1,
                label="1y",
                step="year",
                stepmode="backward"),
            dict(count=5,
                label="5y",
                step="year",
                stepmode="backward"),
            dict(count=10,
                label="10y",
                step="year",
                stepmode="backward"),
            dict(step="all")
        ]),
        bgcolor = '#eaeaea',
        x = 0,
        y = 1
    ),
)

fig.update_yaxes(
    tickformat = ',',
    gridcolor = '#d6d6d6',
    title = 'No. of speeches'
)

fig.show()

## Normalise frequency by seats distribution
* Labour and Co-operative Party is treated as Labour.
* There are slight inaccuracies in calculation of Lord membership.

In [46]:
df_filtered.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7156 entries, 9206 to 16435
Data columns (total 21 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   gid                    7156 non-null   object        
 1   hdate                  7156 non-null   datetime64[ns]
 2   parent_body            7156 non-null   object        
 3   file_name              7156 non-null   object        
 4   html_file_name         4884 non-null   object        
 5   debate_type            7156 non-null   object        
 6   written_type           7156 non-null   object        
 7   speech_body            7156 non-null   object        
 8   full_url               7156 non-null   object        
 9   relevant_speeches      7156 non-null   float64       
 10  speaker_id             7156 non-null   object        
 11  person_id              7156 non-null   object        
 12  speaker_name           7156 non-null   object        
 13 

In [48]:
df_filtered['debate_type'].unique()

array(['Scottish Parliament debates', 'Lords debates', 'Written Answers',
       'Westminster Hall debates', 'Scottish Parliament written answers',
       'Northern Ireland Assembly debates', 'Commons debates',
       'Written Ministerial Statements', 'Welsh Parliament record',
       'Questions to the Mayor of London', 'Public Bill Committees'],
      dtype=object)

In [None]:
# 'Lord debates'
# 'Westminster Hall debates', 'Commons debates', 'Public Bill Committees'
# 'Written Answers', 'Written Ministerial Statements'