# 02 Prep parliamentary constituency data 

> A first look at the ONS data on a parliamentary constituency level. This includes total population, age, ethnicity and educational attainment.   

---

In [1]:
#|default_exp core.01_prep_data

In [2]:
#|hide
import nbdev; nbdev.nbdev_export()

In [3]:
#|hide
from nbdev.showdoc import show_doc

In [4]:
#|export
import dementia_inequalities as proj
from dementia_inequalities import const, log, utils, tools
import adu_proj.utils as adutils

In [5]:
#|export
import numpy as np 
import pandas as pd 

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


---

Start by loading the datasets: 
* mid-2020 parliamentary constituency population estimates by age
* 2021 census ethnicity data
* 2021 census educational qualifications data. 

In [6]:
#|export
df_pc_age = pd.read_excel(const.data_path+'/mid2020parliconsyoaestimatesage.xlsx', sheet_name='Mid-2020 Persons')
df_pc_eth = pd.read_excel(const.data_path+'/ethnicity_2021census.xlsx')
df_pc_educ = pd.read_excel(const.data_path+'/qualifications_2021census.xlsx')

  warn("""Cannot parse header or footer so it will be ignored""")


In [7]:
# Tidy table 
df_pc_age.columns = df_pc_age.iloc[3]
df_pc_age.drop(df_pc_age.index[0:4], inplace=True)

# Add columns for >65 age 
age_65_loc = df_pc_age.columns.get_loc(65.0)
df_pc_age['over_65'] = df_pc_age.iloc[:,age_65_loc:].sum(axis=1)
df_pc_age['over_65_pc'] = df_pc_age['over_65']/df_pc_age['All Ages']

In [8]:
df_pc_age.head()

3,PCON11CD,PCON11NM,All Ages,0.0,1.0,2.0,3.0,4.0,5.0,6.0,...,83.0,84.0,85.0,86.0,87.0,88.0,89.0,90+,over_65,over_65_pc
4,E14000530,Aldershot,105168,1313.0,1401.0,1436.0,1294.0,1347.0,1491.0,1323.0,...,449.0,362.0,317.0,322.0,230.0,186.0,179.0,802,16472.0,0.156626
5,E14000531,Aldridge-Brownhills,77683,783.0,789.0,840.0,784.0,822.0,908.0,897.0,...,568.0,461.0,412.0,348.0,333.0,319.0,253.0,922,18114.0,0.233178
6,E14000532,Altrincham and Sale West,102444,943.0,1058.0,1130.0,1198.0,1390.0,1287.0,1416.0,...,511.0,455.0,436.0,376.0,346.0,311.0,292.0,1252,19395.0,0.189323
7,E14000533,Amber Valley,92277,815.0,902.0,932.0,1008.0,957.0,964.0,939.0,...,466.0,409.0,350.0,327.0,318.0,237.0,191.0,897,20266.0,0.219621
8,E14000534,Arundel and South Downs,102673,789.0,779.0,903.0,938.0,984.0,1097.0,1052.0,...,737.0,690.0,590.0,552.0,496.0,436.0,427.0,1562,29079.0,0.28322


In [9]:
# pivot from long format to wide for education and ethnicity datasets 
df_pc_educ_wide = df_pc_educ[['RegNationName','ConstituencyName', 'groups', 'Con_pc']].pivot_table(index=['RegNationName','ConstituencyName'], columns='groups', values='Con_pc')
df_pc_eth_wide = df_pc_eth[['ConstituencyName', 'ethnic_groups', 'Con_pc']].pivot_table(index='ConstituencyName', columns='ethnic_groups', values='Con_pc')

In [10]:
df_pc_educ_wide.head()

Unnamed: 0_level_0,groups,1 or more GCSEs,2 or more A levels,Apprenticeship,Higher education qualifications,No qualifications,Other qualifications
RegNationName,ConstituencyName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
East Midlands,Amber Valley,0.131008,0.183863,0.070313,0.243244,0.216873,0.023691
East Midlands,Ashfield,0.139701,0.176188,0.068853,0.205517,0.24421,0.02583
East Midlands,Bassetlaw,0.129686,0.173597,0.067468,0.251465,0.217066,0.031033
East Midlands,Bolsover,0.133019,0.177657,0.06765,0.222011,0.239934,0.026709
East Midlands,Boston and Skegness,0.136154,0.145172,0.07177,0.182568,0.286914,0.041268


In [11]:
df_pc_eth_wide.head()

ethnic_groups,African,Any other ethnic group,Arab,Bangladeshi,Caribbean,Chinese,"English, Welsh, Scottish, Northern Irish or British",Gypsy or Irish Traveller,Indian,Irish,Other Asian,Other Black,Other Mixed or Multiple ethnic groups,Other White,Pakistani,Roma,White and Asian,White and Black African,White and Black Caribbean
ConstituencyName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Aberavon,0.002926,0.003042,0.00075,0.005795,0.001024,0.003431,0.935,0.001989,0.005781,0.003373,0.005709,0.000793,0.002725,0.014646,0.002465,0.000346,0.004022,0.001658,0.004527
Aberconwy,0.001253,0.002345,0.000895,0.001253,0.000609,0.003168,0.938102,0.000555,0.002452,0.007715,0.003258,0.000519,0.002757,0.026259,0.000859,0.000233,0.003938,0.001539,0.002291
Aldershot,0.014307,0.024408,0.00177,0.002867,0.006237,0.005024,0.72202,0.001905,0.020625,0.006659,0.099161,0.00408,0.006471,0.052896,0.011359,0.001024,0.008295,0.004098,0.006794
Aldridge-Brownhills,0.007418,0.009324,0.000451,0.001172,0.009156,0.003503,0.864846,0.000721,0.043,0.006619,0.004636,0.002267,0.003941,0.014243,0.00913,0.000193,0.006568,0.001365,0.011449
Altrincham and Sale West,0.010701,0.013912,0.010959,0.002398,0.004033,0.027615,0.751504,0.000109,0.044579,0.018806,0.009641,0.001784,0.008075,0.034581,0.036582,0.000614,0.011454,0.003934,0.00872


Let's check the length of each dataframe. They should include all the English parliamentary constituencies (533) plus all the Welsh constituencies (40). 

In [12]:
print(len(df_pc_eth_wide))
print(len(df_pc_educ_wide))
print(len(df_pc_age))

573
573
573
