# Women's imprisonment rates
## Merged 2014–2022 timeseries: Calculating rates

#### Importing pandas library and reading in data

In [1]:
import pandas as pd

In [2]:
df=pd.read_csv('data/interim/merged_rate_pop_2014-2022_NOT_REBASED.csv')
df

Unnamed: 0,pfa,year,population,no_imp
0,Avon and Somerset,2014,640099,196
1,Avon and Somerset,2015,649120,165
2,Avon and Somerset,2016,656882,164
3,Avon and Somerset,2017,662282,158
4,Avon and Somerset,2018,669435,148
...,...,...,...,...
373,Wiltshire,2018,277329,49
374,Wiltshire,2019,277747,49
375,Wiltshire,2020,280304,34
376,Wiltshire,2021,290739,33


#### Calculating overall imprisonment rate for England and Wales per year

In [3]:
eng_wales = df.groupby(by='year').agg({'population':'sum', 'no_imp': 'sum'})
eng_wales

Unnamed: 0_level_0,population,no_imp
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2014,22032119,7418
2015,22247804,7250
2016,22470678,7262
2017,22613978,7182
2018,22761785,6463
2019,22889010,5876
2020,23014964,4383
2021,22854345,4221
2022,22854345,4120


In [4]:
eng_wales['imp_rate'] = eng_wales['no_imp'] / eng_wales['population'] *100000
eng_wales

Unnamed: 0_level_0,population,no_imp,imp_rate
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,22032119,7418,33.669027
2015,22247804,7250,32.587486
2016,22470678,7262,32.317672
2017,22613978,7182,31.759118
2018,22761785,6463,28.394082
2019,22889010,5876,25.671709
2020,23014964,4383,19.044131
2021,22854345,4221,18.469136
2022,22854345,4120,18.027207


#### Just checking against previously published editions to see how comparable our earlier 2017 calculations were when using 2016 mid-year population figures.

In [5]:
wales = ['Dyfed-Powys', 'Gwent', 'North Wales', 'South Wales']

In [6]:
wales_stats = df.query('pfa in @wales')
wales_stats

Unnamed: 0,pfa,year,population,no_imp
90,Dyfed-Powys,2014,203491,37
91,Dyfed-Powys,2015,203495,33
92,Dyfed-Powys,2016,203886,32
93,Dyfed-Powys,2017,204130,44
94,Dyfed-Powys,2018,204774,41
95,Dyfed-Powys,2019,205775,38
96,Dyfed-Powys,2020,207412,14
97,Dyfed-Powys,2021,203747,14
98,Dyfed-Powys,2022,203747,20
126,Gwent,2014,221606,90


In [7]:
wales_df = wales_stats.groupby(by='year').agg({'population': 'sum', 'no_imp': 'sum'})
wales_df

Unnamed: 0_level_0,population,no_imp
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2014,1198360,561
2015,1203629,542
2016,1211995,622
2017,1217912,610
2018,1224285,601
2019,1231609,507
2020,1240703,346
2021,1203638,361
2022,1203638,288


In [8]:
wales_df.assign(imp_rate = (wales_df['no_imp']/wales_df['population'])*100000)

Unnamed: 0_level_0,population,no_imp,imp_rate
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,1198360,561,46.813979
2015,1203629,542,45.030487
2016,1211995,622,51.320344
2017,1217912,610,50.08572
2018,1224285,601,49.089877
2019,1231609,507,41.165662
2020,1240703,346,27.887415
2021,1203638,361,29.992406
2022,1203638,288,23.92746


Now for England

In [9]:
eng_stats = df.query('pfa not in @wales')
eng_stats

Unnamed: 0,pfa,year,population,no_imp
0,Avon and Somerset,2014,640099,196
1,Avon and Somerset,2015,649120,165
2,Avon and Somerset,2016,656882,164
3,Avon and Somerset,2017,662282,158
4,Avon and Somerset,2018,669435,148
...,...,...,...,...
373,Wiltshire,2018,277329,49
374,Wiltshire,2019,277747,49
375,Wiltshire,2020,280304,34
376,Wiltshire,2021,290739,33


In [10]:
eng_df = eng_stats.groupby(by='year').agg({'population': 'sum', 'no_imp': 'sum'})
eng_df

Unnamed: 0_level_0,population,no_imp
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2014,20833759,6857
2015,21044175,6708
2016,21258683,6640
2017,21396066,6572
2018,21537500,5862
2019,21657401,5369
2020,21774261,4037
2021,21650707,3860
2022,21650707,3832


In [11]:
eng_df.assign(imp_rate = (eng_df['no_imp']/eng_df['population'])*100000)

Unnamed: 0_level_0,population,no_imp,imp_rate
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,20833759,6857,32.912928
2015,21044175,6708,31.875804
2016,21258683,6640,31.234296
2017,21396066,6572,30.715927
2018,21537500,5862,27.217644
2019,21657401,5369,24.790602
2020,21774261,4037,18.540239
2021,21650707,3860,17.828517
2022,21650707,3832,17.699191


#### Continuing the analysis with the PFA data

In [17]:
df['imp_rate'] = (df['no_imp'] / df['population'] *100000).round(1).sort_values(ascending=False)
# df['imp_rate'].round(0).sort_values(ascending=False)

In [18]:
df

Unnamed: 0,pfa,year,population,no_imp,imp_rate
0,Avon and Somerset,2014,640099,196,30.6
1,Avon and Somerset,2015,649120,165,25.4
2,Avon and Somerset,2016,656882,164,25.0
3,Avon and Somerset,2017,662282,158,23.9
4,Avon and Somerset,2018,669435,148,22.1
...,...,...,...,...,...
373,Wiltshire,2018,277329,49,17.7
374,Wiltshire,2019,277747,49,17.6
375,Wiltshire,2020,280304,34,12.1
376,Wiltshire,2021,290739,33,11.4


Just checking these values against those that were previously agreed.

In [20]:
df.query('year == 2019').round(1).sort_values(by='imp_rate', ascending=False)

Unnamed: 0,pfa,year,population,no_imp,imp_rate
275,South Wales,2019,522657,340,65.1
41,Cleveland,2019,215869,98,45.4
266,Nottinghamshire,2019,455708,184,40.4
203,Merseyside,2019,551401,213,38.6
356,West Midlands,2019,1093083,406,37.1
32,Cheshire,2019,410269,149,36.3
50,Cumbria,2019,198797,72,36.2
158,Humberside,2019,364142,129,35.4
365,West Yorkshire,2019,877410,301,34.3
230,North Wales,2019,274844,89,32.4


In [23]:
df.query('pfa == "Metropolitan Police"')

Unnamed: 0,pfa,year,population,no_imp,imp_rate
207,Metropolitan Police,2014,3249559,1290,39.7
208,Metropolitan Police,2015,3303914,1180,35.7
209,Metropolitan Police,2016,3351044,1272,38.0
210,Metropolitan Police,2017,3371167,1192,35.4
211,Metropolitan Police,2018,3409951,998,29.3
212,Metropolitan Police,2019,3430425,884,25.8
213,Metropolitan Police,2020,3459224,632,18.3
214,Metropolitan Police,2021,3296835,536,16.3
215,Metropolitan Police,2022,3296835,576,17.5


Despite some of the rates coming out ± 1 the ordering looks very similar to the previously published version. So I'm happy to go ahead with this.

In [None]:
df

In [22]:
df.to_csv('data/processed/pfa_imprisonment_rates_2014-2022.csv', index=False)

Now to pivot this table to the final publication format

In [24]:
df_pivot = df.pivot_table(index='pfa', columns='year', values='imp_rate')

In [25]:
df_pivot

year,2014,2015,2016,2017,2018,2019,2020,2021,2022
pfa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Avon and Somerset,30.6,25.4,25.0,23.9,22.1,22.5,15.2,15.1,17.0
Bedfordshire,28.5,32.5,21.2,21.2,14.3,12.3,9.0,7.5,14.3
Cambridgeshire,28.4,27.4,34.2,35.0,35.1,26.9,23.4,13.6,19.6
Cheshire,42.3,45.0,41.3,42.4,43.2,36.3,29.9,27.4,17.4
Cleveland,42.8,36.6,50.5,70.9,65.1,45.4,25.4,47.9,46.5
Cumbria,46.6,52.0,46.4,52.5,66.5,36.2,22.7,20.0,14.5
Derbyshire,42.8,44.5,43.4,42.7,43.5,29.8,31.4,30.5,29.5
Devon and Cornwall,17.4,18.8,17.7,21.6,17.4,15.3,15.2,12.2,8.9
Dorset,18.7,22.2,17.1,23.8,16.9,19.8,11.3,12.3,7.4
Durham,33.8,31.2,32.6,26.0,31.9,16.4,22.3,20.4,23.2


In [29]:
df_pivot.sort_values(by=[2022], ascending=True, inplace=True)
df_pivot

year,2014,2015,2016,2017,2018,2019,2020,2021,2022
pfa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Surrey,20.4,14.2,14.1,16.9,9.8,9.3,6.4,7.5,4.2
Dorset,18.7,22.2,17.1,23.8,16.9,19.8,11.3,12.3,7.4
Gwent,40.6,34.1,39.2,48.3,38.4,17.5,7.8,11.1,7.5
Devon and Cornwall,17.4,18.8,17.7,21.6,17.4,15.3,15.2,12.2,8.9
Gloucestershire,27.9,29.7,24.1,23.8,16.7,14.6,7.3,5.2,9.2
West Mercia,23.6,20.8,22.6,24.6,21.2,22.3,19.2,12.1,9.2
Sussex,20.6,24.8,19.8,15.8,14.7,12.8,9.9,9.8,9.4
Dyfed-Powys,18.2,16.2,15.7,21.6,20.0,18.5,6.7,6.9,9.8
Hertfordshire,21.3,20.4,25.0,22.9,16.3,15.4,8.5,8.5,9.8
Wiltshire,11.9,16.2,17.9,17.0,17.7,17.6,12.1,11.4,10.3


All looks good, let's save it out to a new CSV

In [30]:
df_pivot.to_csv('data/processed/pfa_imprisonment_rates_2014-2022_final_table.csv')