In [1]:
import pandas as pd
import numpy as np
import json

In [2]:
person_dict = json.load(open('people.json'))

In [49]:
memberships_df = pd.DataFrame(person_dict['memberships'])
memberships_df.rename({
    'id': 'membership_id',
    'on_behalf_of_id': 'party_id',
    'start_date': 'membership_start_date',
    'end_date': 'membership_end_date'
    }, axis = 1, inplace = True)
memberships_df.drop(['end_reason', 'identifiers', 'start_reason', 'role', 'redirect', 'reason', 'name', 'organization_id'], axis = 1, inplace = True)
memberships_df.dropna(subset=['person_id'], inplace = True)

posts_df = pd.json_normalize(person_dict['posts'], sep = '_')
posts_df.rename({
    'id': 'post_id',
    'label': 'post_name',
    'area_name': 'post_area_name'
    }, axis = 1, inplace = True)
posts_df.drop(['identifiers', 'organization_id', 'role', 'start_date', 'end_date'], axis = 1, inplace = True)

parties_df = pd.DataFrame(person_dict['organizations'])
parties_df.rename({
    'id': 'party_id',
    'name': 'party_name'
    }, axis = 1, inplace = True)
parties_df['party_name'] = parties_df['party_name'].str.replace('Labour/Co-operative', 'Labour')
parties_df.drop(['classification', 'identifiers'], axis = 1, inplace = True)

In [31]:
memberships_posts_df = pd.merge(
    memberships_df,
    posts_df,
    how = 'left',
    left_on = 'post_id',
    right_on = 'post_id'
)

memberships_posts_parties_df = pd.merge(
    memberships_posts_df,
    parties_df,
    how = 'left',
    left_on = 'party_id',
    right_on = 'party_id'
)

uk_parliament_df = memberships_posts_parties_df[(memberships_posts_parties_df['label'] == 'Peer') | (memberships_posts_parties_df['post_name'].str.contains('Member of Parliament'))].copy()

uk_parliament_df['label'] = np.where(
    uk_parliament_df['label'].isna(),
    'Member of Parliament',
    uk_parliament_df['label']
)

uk_parliament_df['membership_start_date'] = pd.to_datetime(uk_parliament_df['membership_start_date'])
uk_parliament_df['membership_end_date'] = pd.to_datetime(uk_parliament_df['membership_end_date'])

uk_parliament_df = uk_parliament_df[(uk_parliament_df['membership_end_date'].isna()) | (uk_parliament_df['membership_end_date'] >= '2001-01-01')]

In [45]:
lords_df = pd.DataFrame()
commons_df = pd.DataFrame()
monthly_series = pd.date_range(start='2001-01-01', end='2023-12-01', freq='MS')

for month in monthly_series:
    filtered_df = uk_parliament_df[
        (uk_parliament_df['membership_start_date'] <= month) &
        (uk_parliament_df['membership_end_date'].fillna(pd.Timestamp('now')) >= month)
    ].copy()
    filtered_df.drop_duplicates(subset='person_id', inplace=True)
    grouped = filtered_df.groupby(['label', 'party_name']).size().reset_index(name='count')
    pivoted = grouped.pivot(index='label', columns='party_name', values='count')
    pivoted.reset_index(inplace=True)
    lords_month = pivoted[pivoted['label'] == 'Peer'].copy()
    lords_month.dropna(axis=1, how='all', inplace=True)
    lords_month.insert(0, 'month', month)
    lords_df = pd.concat([lords_df, lords_month], axis=0)
    commons_month = pivoted[pivoted['label'] == 'Member of Parliament'].copy()
    commons_month.dropna(axis=1, how='all', inplace=True)
    commons_month.insert(0, 'month', month)
    commons_df = pd.concat([commons_df, commons_month], axis=0)

lords_df['total_seats'] = lords_df.sum(axis=1, numeric_only=True, skipna=True)
commons_df['total_seats'] = commons_df.sum(axis=1, numeric_only=True, skipna=True)

In [58]:
both_houses_df = pd.DataFrame()
monthly_series = pd.date_range(start='2001-01-01', end='2023-12-01', freq='MS')

for month in monthly_series:
    filtered_df = uk_parliament_df[
        (uk_parliament_df['membership_start_date'] <= month) &
        (uk_parliament_df['membership_end_date'].fillna(pd.Timestamp('now')) >= month)
    ].copy()
    filtered_df.drop_duplicates(subset='person_id', inplace=True)
    grouped = filtered_df.groupby('party_name').size().reset_index(name='count')
    grouped['constant'] = 0
    pivoted = grouped.pivot(index='constant', columns='party_name', values='count')
    pivoted.reset_index(inplace=True)
    pivoted.drop('constant', axis=1, inplace=True)
    pivoted.insert(0, 'month', month)
    both_houses_df = pd.concat([both_houses_df, pivoted], axis=0)

both_houses_df['total_seats'] = both_houses_df.sum(axis=1, numeric_only=True, skipna=True)

In [None]:
lords_df.to_csv('intermediate_outputs/monthly_lords_membership.csv', index=False)
commons_df.to_csv('intermediate_outputs/monthly_commons_membership.csv', index=False)
both_houses_df.to_csv('intermediate_outputs/monthly_uk_parliament_membership.csv', index=False)