# Women's imprisonment rates
## Data Merging

### Importing pandas library and reading in data

In [1]:
import pandas as pd

#### LA PFA population data

In [2]:
df = pd.read_csv('../data/interim/LA_population_female_2001_2021_PFAs_cleansed.csv')
df

Unnamed: 0,ladcode21,laname21,year,population,pfa
0,E06000001,Hartlepool,2001,35629,Cleveland
1,E06000002,Middlesbrough,2001,54982,Cleveland
2,E06000003,Redcar and Cleveland,2001,56080,Cleveland
3,E06000004,Stockton-on-Tees,2001,72422,Cleveland
4,E06000005,Darlington,2001,39947,Durham
...,...,...,...,...,...
6595,W06000020,Torfaen,2020,39169,Gwent
6596,W06000021,Monmouthshire,2020,39955,Gwent
6597,W06000022,Newport,2020,61972,Gwent
6598,W06000023,Powys,2020,55565,Dyfed-Powys


#### Reading in the imprisonment figures in from the processed CJS court outcomes by PFA dataset and dropping the column which shows percentage change.

In [3]:
df2 = pd.read_csv('../data/processed/female_custodial_sentences_PFA.csv', index_col = 'pfa')
df2.drop(columns='per_change_2014', inplace=True)

In [4]:
df2.head()

Unnamed: 0_level_0,2014,2015,2016,2017,2018,2019,2020,2021
pfa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Avon and Somerset,196,165,164,158,148,151,103,103
Bedfordshire,69,80,53,53,36,31,23,20
Cambridgeshire,91,89,112,115,116,89,78,47
Cheshire,169,181,167,172,176,149,123,117
Cleveland,91,78,108,152,140,98,55,103


#### Melting the dataframe

In [25]:
df3 = df2.melt(var_name="year", value_name="no_imp", ignore_index=False).copy()
df3

Unnamed: 0_level_0,year,no_imp
pfa,Unnamed: 1_level_1,Unnamed: 2_level_1
Avon and Somerset,2014,196
Bedfordshire,2014,69
Cambridgeshire,2014,91
Cheshire,2014,169
Cleveland,2014,91
...,...,...
Warwickshire,2021,28
West Mercia,2021,62
West Midlands,2021,226
West Yorkshire,2021,250


#### Sorting by PFA and year

In [26]:
df3.sort_values(by=['pfa', 'year'], inplace=True)
df3

Unnamed: 0_level_0,year,no_imp
pfa,Unnamed: 1_level_1,Unnamed: 2_level_1
Avon and Somerset,2014,196
Avon and Somerset,2015,165
Avon and Somerset,2016,164
Avon and Somerset,2017,158
Avon and Somerset,2018,148
...,...,...
Wiltshire,2017,47
Wiltshire,2018,49
Wiltshire,2019,49
Wiltshire,2020,34


#### Resetting the index

In [27]:
df3.reset_index(drop=False, inplace=True)
df3

Unnamed: 0,pfa,year,no_imp
0,Avon and Somerset,2014,196
1,Avon and Somerset,2015,165
2,Avon and Somerset,2016,164
3,Avon and Somerset,2017,158
4,Avon and Somerset,2018,148
...,...,...,...
331,Wiltshire,2017,47
332,Wiltshire,2018,49
333,Wiltshire,2019,49
334,Wiltshire,2020,34


In [55]:
df3['year'] = df3['year'].astype(int).copy()

### Filtering LA PFA population data to match time series of CJS PFA imprisonment data

In [34]:
df_1421 = df.query('2014 <= year <= 2021').copy()
df_1421

Unnamed: 0,ladcode21,laname21,year,population,pfa
4290,E06000001,Hartlepool,2014,37631,Cleveland
4291,E06000002,Middlesbrough,2014,55011,Cleveland
4292,E06000003,Redcar and Cleveland,2014,56213,Cleveland
4293,E06000004,Stockton-on-Tees,2014,78237,Cleveland
4294,E06000005,Darlington,2014,43222,Durham
...,...,...,...,...,...
6595,W06000020,Torfaen,2020,39169,Gwent
6596,W06000021,Monmouthshire,2020,39955,Gwent
6597,W06000022,Newport,2020,61972,Gwent
6598,W06000023,Powys,2020,55565,Dyfed-Powys


In [35]:
df4 = df_1421.groupby(['pfa', 'year'], as_index=False).agg({'population': 'sum'}).copy()
df4

Unnamed: 0,pfa,year,population
0,Avon and Somerset,2014,671507
1,Avon and Somerset,2015,679322
2,Avon and Somerset,2016,686399
3,Avon and Somerset,2017,691747
4,Avon and Somerset,2018,699074
...,...,...,...
289,Wiltshire,2016,282744
290,Wiltshire,2017,284852
291,Wiltshire,2018,286800
292,Wiltshire,2019,288049


In [37]:
df3.columns

Index(['pfa', 'year', 'no_imp'], dtype='object')

In [38]:
df4.columns

Index(['pfa', 'year', 'population'], dtype='object')

In [58]:
df3.dtypes

pfa       object
year       int64
no_imp     int64
dtype: object

In [59]:
df4.dtypes

pfa           object
year           int64
population     int64
dtype: object