In [1]:
import pandas as pd

df = pd.read_excel('myebtablesuk20112022.xlsx', sheet_name = 'MYEB1', header = 1)

In [2]:
# target regions
target_LA = [
    'E08000001', 'E08000002', 'E08000003', 'E08000004', 'E08000005',
    'E08000006', 'E08000007', 'E08000008', 'E08000009', 'E08000010'
]

df_target = df[df['ladcode23'].isin(target_LA)]

# years
year_cols = [col for col in df.columns if 'population_' in col]

# summarize by years and regions
pop_LA = df_target.groupby('ladcode23')[year_cols].sum().T
pop_LA.index = [int(c.replace("population_", "")) for c in pop_LA.index]
pop_LA.info()

<class 'pandas.core.frame.DataFrame'>
Index: 12 entries, 2011 to 2022
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   E08000001  12 non-null     int64
 1   E08000002  12 non-null     int64
 2   E08000003  12 non-null     int64
 3   E08000004  12 non-null     int64
 4   E08000005  12 non-null     int64
 5   E08000006  12 non-null     int64
 6   E08000007  12 non-null     int64
 7   E08000008  12 non-null     int64
 8   E08000009  12 non-null     int64
 9   E08000010  12 non-null     int64
dtypes: int64(10)
memory usage: 1.0 KB


In [3]:
pop_LA.head()

ladcode23,E08000001,E08000002,E08000003,E08000004,E08000005,E08000006,E08000007,E08000008,E08000009,E08000010
2011,277296,185422,502902,225157,211929,234487,283253,219727,227091,318122
2012,279738,186632,506869,226057,212345,237421,283953,220457,228512,318109
2013,281629,187093,510783,227866,212503,239718,284816,221446,230367,318737
2014,283212,188406,515360,230348,213462,242602,286267,222364,231799,320075
2015,284972,189254,523321,232693,214560,246942,288210,223642,232504,320864


In [4]:
pop_LA_1322 = pop_LA.loc[2013:2022]
pop_LA_1322.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 2013 to 2022
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   E08000001  10 non-null     int64
 1   E08000002  10 non-null     int64
 2   E08000003  10 non-null     int64
 3   E08000004  10 non-null     int64
 4   E08000005  10 non-null     int64
 5   E08000006  10 non-null     int64
 6   E08000007  10 non-null     int64
 7   E08000008  10 non-null     int64
 8   E08000009  10 non-null     int64
 9   E08000010  10 non-null     int64
dtypes: int64(10)
memory usage: 880.0 bytes


In [5]:
# read 2023 population
df_2023 = pd.read_excel(
    'mye23tablesew.xlsx',
    sheet_name='MYE2 - Persons',
    skiprows=7
)

print(df_2023.columns.tolist())

['Code', 'Name', 'Geography', 'All ages', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90+']


In [6]:
# target regions
df_2023_target = df_2023[df_2023['Code'].isin(target_LA)].copy()

age_cols = [col for col in df_2023_target.columns if col.isdigit()]

# sum by regions
pop_2023 = df_2023_target.set_index('Code')[age_cols].sum(axis=1)

pop_2023_df = pop_2023.to_frame().T
pop_2023_df.index = [2023]

pop_2023_df = pop_2023_df[pop_LA_1322.columns]

# merge
pop_LA_1323 = pd.concat([pop_LA_1322, pop_2023_df])

In [7]:
print(pop_LA_1323.info(), '\n')
print(pop_LA_1323.tail())

<class 'pandas.core.frame.DataFrame'>
Index: 11 entries, 2013 to 2023
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   E08000001  11 non-null     int64
 1   E08000002  11 non-null     int64
 2   E08000003  11 non-null     int64
 3   E08000004  11 non-null     int64
 4   E08000005  11 non-null     int64
 5   E08000006  11 non-null     int64
 6   E08000007  11 non-null     int64
 7   E08000008  11 non-null     int64
 8   E08000009  11 non-null     int64
 9   E08000010  11 non-null     int64
dtypes: int64(10)
memory usage: 968.0 bytes
None 

      E08000001  E08000002  E08000003  E08000004  E08000005  E08000006  \
2019     294888     193763     545947     242077     222042     263611   
2020     295712     193604     547340     242181     223024     266090   
2021     296169     193866     550630     242003     224127     270866   
2022     299153     194590     566778     243993     226950     278867   
2023     300353     

In [8]:
pop_long = pop_LA_1323.reset_index().melt(
    id_vars='index', var_name='LOCAL_AUTHORITY', value_name='population'
).rename(columns={'index': 'year'})

pop_long.to_csv("population_long_2013_2023.csv", index=False)