# Women's imprisonment rates
## Criminal Justice Statistics Police Force Area: Filtering by custodial sentence length

In [1]:
import pandas as pd

In [2]:
%load_ext autoreload
%autoreload 2

## Testing `filter_sentence_length.py`

In [3]:
from src.data.processing import filter_sentence_length

In [4]:
df = filter_sentence_length.load_and_process_data()

2025-06-11 10:44:23,111 - INFO - Loaded data from data/interim/women_cust_comm_sus.csv
2025-06-11 10:44:23,439 - INFO - Filtering data for custodial sentences...
2025-06-11 10:44:23,452 - INFO - Grouping sentence lengths...
2025-06-11 10:44:23,555 - INFO - Sentence lengths grouped and recategorised.
2025-06-11 10:44:23,556 - INFO - Grouping data by PFA, year, and sentence length...


In [None]:
df.info()

In [None]:
categories = df['sentence_len'].cat.categories
categories

In [5]:
df

Unnamed: 0,pfa,year,sentence_len,freq
0,Avon and Somerset,2010,Less than 6 months,113
1,Avon and Somerset,2010,6 months to less than 12 months,16
2,Avon and Somerset,2010,12 months or more,49
3,Avon and Somerset,2011,Less than 6 months,142
4,Avon and Somerset,2011,6 months to less than 12 months,21
...,...,...,...,...
1839,Wiltshire,2023,6 months to less than 12 months,4
1840,Wiltshire,2023,12 months or more,10
1841,Wiltshire,2024,Less than 6 months,22
1842,Wiltshire,2024,6 months to less than 12 months,1


## Moving on to testing `make_custody_tables.py`

In [4]:
from src.data.processing import make_custody_tables

In [5]:
df = make_custody_tables.load_and_process_data()
df

2025-06-11 11:18:16,093 - INFO - Loaded data from data/processed/women_cust_sentence_len_FINAL.csv


Unnamed: 0,pfa,year,sentence_len,freq
0,Avon and Somerset,2010,Less than 6 months,113
1,Avon and Somerset,2010,6 months to less than 12 months,16
2,Avon and Somerset,2010,12 months or more,49
3,Avon and Somerset,2011,Less than 6 months,142
4,Avon and Somerset,2011,6 months to less than 12 months,21
...,...,...,...,...
1839,Wiltshire,2023,6 months to less than 12 months,4
1840,Wiltshire,2023,12 months or more,10
1841,Wiltshire,2024,Less than 6 months,22
1842,Wiltshire,2024,6 months to less than 12 months,1


In [6]:
import src.utilities as utils
config = utils.read_config()

In [8]:
OUTPUT_FILENAME_TEMPLATE = config['data']['datasetFilenames']['make_custody_tables_template']
OUTPUT_FILENAME_TEMPLATE

'PFA_custodial_sentences_{category}_FINAL.csv'

In [18]:
make_custody_tables.get_output_filename(
    category="6 months",
    template=OUTPUT_FILENAME_TEMPLATE
)

'PFA_custodial_sentences_six_months_FINAL.csv'

In [21]:
VALID_CATEGORIES = {
    "all": {
        "filter": None,
        "slug": "all"
    },
    "6 months": {
        "filter": ["Less than 6 months"],
        "slug": "six_months"
    },
    "12 months": {
        "filter": ["Less than 6 months", "6 months to less than 12 months"],
        "slug": "12_months"
    }
}

In [22]:
VALID_CATEGORIES

{'all': {'filter': None, 'slug': 'all'},
 '6 months': {'filter': ['Less than 6 months'], 'slug': 'six_months'},
 '12 months': {'filter': ['Less than 6 months',
   '6 months to less than 12 months'],
  'slug': '12_months'}}

In [24]:
make_custody_tables.load_and_process_data()

2025-06-11 12:00:59,956 - INFO - Loaded data from data/processed/women_cust_sentence_len_FINAL.csv
2025-06-11 12:00:59,961 - INFO - Filtering for the total number of custodial sentences
2025-06-11 12:00:59,964 - INFO - Filtering data from 2014 onwards
2025-06-11 12:00:59,975 - INFO - Saving...
2025-06-11 12:00:59,978 - INFO - Data successfully saved to data/processed/PFA_custodial_sentences_all_FINAL.csv
2025-06-11 12:00:59,979 - INFO - Filtering for custodial sentences of less than 6 months
2025-06-11 12:00:59,985 - INFO - Filtering data from 2014 onwards
2025-06-11 12:00:59,994 - INFO - Saving...
2025-06-11 12:00:59,998 - INFO - Data successfully saved to data/processed/PFA_custodial_sentences_six_months_FINAL.csv
2025-06-11 12:00:59,998 - INFO - Filtering for custodial sentences of less than 12 months
2025-06-11 12:01:00,005 - INFO - Filtering data from 2014 onwards
2025-06-11 12:01:00,014 - INFO - Saving...
2025-06-11 12:01:00,018 - INFO - Data successfully saved to data/processed/

That's working nicely. Now to move on to developing the `calculate_percentage_change` function.

## `calculate_percentage_change`

Loading one of the processed dataframes to perform the calculation and processing testing.

In [38]:
df = pd.read_csv("data/processed/PFA_custodial_sentences_6_months_FINAL.csv")
df

Unnamed: 0,pfa,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,per_change_2014
0,Avon and Somerset,143,126,105,92,98,90,80,61,62,93,117,-0.181818
1,Bedfordshire,48,50,33,34,25,24,14,10,20,21,23,-0.520833
2,Cambridgeshire,55,56,84,82,89,60,44,32,45,56,58,0.054545
3,Cheshire,124,131,125,126,114,98,79,54,39,66,65,-0.475806
4,Cleveland,73,48,69,103,108,60,28,50,56,91,138,0.890411
5,Cumbria,60,59,60,65,94,48,27,20,18,36,49,-0.183333
6,Derbyshire,125,134,131,120,134,87,90,82,82,76,82,-0.344
7,Devon and Cornwall,73,84,81,87,80,73,69,55,41,72,82,0.123288
8,Dorset,36,49,40,40,35,44,24,15,10,29,28,-0.222222
9,Durham,55,50,53,40,48,26,30,26,41,48,52,-0.054545


In [39]:
len(df.columns)

13

In [28]:
make_custody_tables.calculate_percentage_change(df)

TypeError: unsupported operand type(s) for /: 'int' and 'str'

Right, need to set PFA back to being the index to address this.

In [31]:
df.set_index('pfa', inplace=True)
df

Unnamed: 0_level_0,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
pfa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Avon and Somerset,143,126,105,92,98,90,80,61,62,93,117
Bedfordshire,48,50,33,34,25,24,14,10,20,21,23
Cambridgeshire,55,56,84,82,89,60,44,32,45,56,58
Cheshire,124,131,125,126,114,98,79,54,39,66,65
Cleveland,73,48,69,103,108,60,28,50,56,91,138
Cumbria,60,59,60,65,94,48,27,20,18,36,49
Derbyshire,125,134,131,120,134,87,90,82,82,76,82
Devon and Cornwall,73,84,81,87,80,73,69,55,41,72,82
Dorset,36,49,40,40,35,44,24,15,10,29,28
Durham,55,50,53,40,48,26,30,26,41,48,52


In [36]:
make_custody_tables.calculate_percentage_change(df)

Unnamed: 0_level_0,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,per_change_2014
pfa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Avon and Somerset,143,126,105,92,98,90,80,61,62,93,117,-0.181818
Bedfordshire,48,50,33,34,25,24,14,10,20,21,23,-0.520833
Cambridgeshire,55,56,84,82,89,60,44,32,45,56,58,0.054545
Cheshire,124,131,125,126,114,98,79,54,39,66,65,-0.475806
Cleveland,73,48,69,103,108,60,28,50,56,91,138,0.890411
Cumbria,60,59,60,65,94,48,27,20,18,36,49,-0.183333
Derbyshire,125,134,131,120,134,87,90,82,82,76,82,-0.344
Devon and Cornwall,73,84,81,87,80,73,69,55,41,72,82,0.123288
Dorset,36,49,40,40,35,44,24,15,10,29,28,-0.222222
Durham,55,50,53,40,48,26,30,26,41,48,52,-0.054545


In [37]:
make_custody_tables.main()

2025-06-11 14:35:43,919 - INFO - Loaded data from data/processed/women_cust_sentence_len_FINAL.csv
2025-06-11 14:35:43,925 - INFO - Filtering for the total number of custodial sentences
2025-06-11 14:35:43,931 - INFO - Filtering data from 2014 onwards
2025-06-11 14:35:43,953 - INFO - Saving...
2025-06-11 14:35:43,965 - INFO - Data successfully saved to data/processed/PFA_custodial_sentences_all_FINAL.csv
2025-06-11 14:35:43,966 - INFO - Filtering for custodial sentences of less than 6 months
2025-06-11 14:35:43,971 - INFO - Filtering data from 2014 onwards
2025-06-11 14:35:43,989 - INFO - Saving...
2025-06-11 14:35:43,995 - INFO - Data successfully saved to data/processed/PFA_custodial_sentences_6_months_FINAL.csv
2025-06-11 14:35:43,996 - INFO - Filtering for custodial sentences of less than 12 months
2025-06-11 14:35:44,002 - INFO - Filtering data from 2014 onwards
2025-06-11 14:35:44,022 - INFO - Saving...
2025-06-11 14:35:44,027 - INFO - Data successfully saved to data/processed/PF