# Women's imprisonment rates
## Criminal Justice Statistics Police Force Area: Short custodial sentence lengths for tables

#### Importing pandas library and reading in data

In [30]:
import pandas as pd
df = pd.read_csv('../data/interim/PFA_2009-21_women_cust_sentence_len.csv')

In [31]:
df.head()

Unnamed: 0,pfa,year,sentence_len,freq
0,Avon and Somerset,2009,6 months and under 12 months,11
1,Avon and Somerset,2009,Less than 6 months,117
2,Avon and Somerset,2009,Over 12 months,37
3,Avon and Somerset,2010,6 months and under 12 months,16
4,Avon and Somerset,2010,Less than 6 months,113


#### Filtering data for 2014 onwards

In [32]:
filt = df['year'] >= 2014
df2 = df[filt]

In [33]:
df2.head()

Unnamed: 0,pfa,year,sentence_len,freq
15,Avon and Somerset,2014,6 months and under 12 months,14
16,Avon and Somerset,2014,Less than 6 months,144
17,Avon and Somerset,2014,Over 12 months,38
18,Avon and Somerset,2015,6 months and under 12 months,8
19,Avon and Somerset,2015,Less than 6 months,126


### Filtering data to show less than 6 month values

In [34]:
filt = df2['sentence_len'] == "Less than 6 months"
df_lt_6 = df2[filt]

In [35]:
df_lt_6

Unnamed: 0,pfa,year,sentence_len,freq
16,Avon and Somerset,2014,Less than 6 months,144
19,Avon and Somerset,2015,Less than 6 months,126
22,Avon and Somerset,2016,Less than 6 months,105
25,Avon and Somerset,2017,Less than 6 months,108
28,Avon and Somerset,2018,Less than 6 months,97
...,...,...,...,...
1617,Wiltshire,2017,Less than 6 months,32
1620,Wiltshire,2018,Less than 6 months,38
1623,Wiltshire,2019,Less than 6 months,32
1626,Wiltshire,2020,Less than 6 months,15


In [36]:
pfa_years = pd.crosstab(index=df_lt_6['pfa'], columns=df_lt_6['year'],
                     values=df_lt_6['freq'], aggfunc='sum')

In [37]:
pfa_years.head()

year,2014,2015,2016,2017,2018,2019,2020,2021
pfa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Avon and Somerset,144,126,105,108,97,89,71,64
Bedfordshire,48,51,33,35,26,22,12,10
Cambridgeshire,53,56,84,85,90,61,43,30
Cheshire,125,132,126,129,118,100,80,55
Cleveland,71,49,69,119,103,56,29,59


In [38]:
pfa_years = pfa_years.fillna(0.0).astype(int)

In [39]:
per_change = pfa_years.pct_change(axis='columns', periods=7).dropna(axis='columns').copy()

In [40]:
per_change.head()

year,2021
pfa,Unnamed: 1_level_1
Avon and Somerset,-0.555556
Bedfordshire,-0.791667
Cambridgeshire,-0.433962
Cheshire,-0.56
Cleveland,-0.169014


In [41]:
df3 = pfa_years.join(per_change.iloc[:,-1], rsuffix='_per_change')
df3.rename(columns={'2021_per_change':'per_change_2014'}, inplace = True)

In [42]:
df3.head()

Unnamed: 0_level_0,2014,2015,2016,2017,2018,2019,2020,2021,per_change_2014
pfa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Avon and Somerset,144,126,105,108,97,89,71,64,-0.555556
Bedfordshire,48,51,33,35,26,22,12,10,-0.791667
Cambridgeshire,53,56,84,85,90,61,43,30,-0.433962
Cheshire,125,132,126,129,118,100,80,55,-0.56
Cleveland,71,49,69,119,103,56,29,59,-0.169014


#### Checking data before 2020

In [44]:
df3.query('pfa == "Merseyside"')

Unnamed: 0_level_0,2014,2015,2016,2017,2018,2019,2020,2021,per_change_2014
pfa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Merseyside,164,185,190,172,144,150,84,94,-0.426829


In [45]:
df3.query('pfa == "Cambridgeshire"')

Unnamed: 0_level_0,2014,2015,2016,2017,2018,2019,2020,2021,per_change_2014
pfa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cambridgeshire,53,56,84,85,90,61,43,30,-0.433962


In [46]:
df3.query('pfa == "South Wales"')

Unnamed: 0_level_0,2014,2015,2016,2017,2018,2019,2020,2021,per_change_2014
pfa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
South Wales,233,235,291,266,268,231,164,134,-0.424893


#### All good, now to output to CSV

In [47]:
df3.to_csv('../data/processed/PFA_less_than_six_months.csv')

### Filtering data to show up to 12 month values

In [48]:
filt = df2['sentence_len'] != "Over 12 months"
df_12m = df2[filt]

In [49]:
df_12m

Unnamed: 0,pfa,year,sentence_len,freq
15,Avon and Somerset,2014,6 months and under 12 months,14
16,Avon and Somerset,2014,Less than 6 months,144
18,Avon and Somerset,2015,6 months and under 12 months,8
19,Avon and Somerset,2015,Less than 6 months,126
21,Avon and Somerset,2016,6 months and under 12 months,12
...,...,...,...,...
1623,Wiltshire,2019,Less than 6 months,32
1625,Wiltshire,2020,6 months and under 12 months,6
1626,Wiltshire,2020,Less than 6 months,15
1628,Wiltshire,2021,6 months and under 12 months,3


In [50]:
pfa_years = pd.crosstab(index=df_12m['pfa'], columns=df_12m['year'],
                     values=df_12m['freq'], aggfunc='sum')

In [51]:
pfa_years.head()

year,2014,2015,2016,2017,2018,2019,2020,2021
pfa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Avon and Somerset,158,134,117,123,115,113,79,71
Bedfordshire,52,55,38,39,28,23,15,14
Cambridgeshire,60,63,92,94,99,70,51,35
Cheshire,139,141,139,141,128,112,88,67
Cleveland,72,58,82,127,108,65,36,67


In [52]:
pfa_years = pfa_years.fillna(0.0).astype(int)

In [53]:
per_change = pfa_years.pct_change(axis='columns', periods=7).dropna(axis='columns').copy()

In [54]:
per_change.head()

year,2021
pfa,Unnamed: 1_level_1
Avon and Somerset,-0.550633
Bedfordshire,-0.730769
Cambridgeshire,-0.416667
Cheshire,-0.517986
Cleveland,-0.069444


In [57]:
df4 = pfa_years.join(per_change.iloc[:,-1], rsuffix='_per_change')
df4.rename(columns={'2021_per_change':'per_change_2014'}, inplace = True)

In [58]:
df4.head()

Unnamed: 0_level_0,2014,2015,2016,2017,2018,2019,2020,2021,per_change_2014
pfa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Avon and Somerset,158,134,117,123,115,113,79,71,-0.550633
Bedfordshire,52,55,38,39,28,23,15,14,-0.730769
Cambridgeshire,60,63,92,94,99,70,51,35,-0.416667
Cheshire,139,141,139,141,128,112,88,67,-0.517986
Cleveland,72,58,82,127,108,65,36,67,-0.069444


#### Checking data before 2020

In [59]:
df4.query('pfa == "Merseyside"')

Unnamed: 0_level_0,2014,2015,2016,2017,2018,2019,2020,2021,per_change_2014
pfa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Merseyside,184,210,208,189,160,166,108,111,-0.396739


In [60]:
df4.query('pfa == "Cambridgeshire"')

Unnamed: 0_level_0,2014,2015,2016,2017,2018,2019,2020,2021,per_change_2014
pfa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cambridgeshire,60,63,92,94,99,70,51,35,-0.416667


In [61]:
df4.query('pfa == "South Wales"')

Unnamed: 0_level_0,2014,2015,2016,2017,2018,2019,2020,2021,per_change_2014
pfa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
South Wales,273,262,328,293,297,262,179,158,-0.421245


In [62]:
df4.query('pfa == "Thames Valley"')

Unnamed: 0_level_0,2014,2015,2016,2017,2018,2019,2020,2021,per_change_2014
pfa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Thames Valley,169,200,180,158,141,117,65,91,-0.461538


#### All good, now to output to CSV

In [63]:
df4.to_csv('../data/processed/PFA_under_12_months.csv')