# Women's imprisonment rates
## Data Merging: LA population data with PFAs

#### Importing pandas library and reading in data

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('data/interim/LA_population_female_2001_2021_NOT_REBASED.csv')
df.head()

Unnamed: 0,ladcode21,ladname21,year,population
0,E06000001,Hartlepool,2001,32246
1,E06000002,Middlesbrough,2001,50887
2,E06000003,Redcar and Cleveland,2001,50671
3,E06000004,Stockton-on-Tees,2001,66611
4,E06000005,Darlington,2001,35701


#### Reading in ONS PFA by LACODE22 dataset
Local Authority District to Community Safety Partnerships to Police Force Areas (
Taken from the ONS Open Geography Portal at https://geoportal.statistics.gov.uk/)


In [3]:
ons_pfa = pd.read_csv('data/external/Local_Authority_District_to_Community_Safety_Partnerships_to_Police_Force_Areas_(December_2022)_Lookup_in_England_and_Wales.csv', usecols=['LAD22CD','LAD22NM', 'PFA22NM'])
ons_pfa

Unnamed: 0,LAD22CD,LAD22NM,PFA22NM
0,E08000001,Bolton,Greater Manchester
1,E08000002,Bury,Greater Manchester
2,E08000003,Manchester,Greater Manchester
3,E08000004,Oldham,Greater Manchester
4,E08000005,Rochdale,Greater Manchester
...,...,...,...
336,E07000227,Horsham,Sussex
337,E07000063,Lewes,Sussex
338,E07000228,Mid Sussex,Sussex
339,E07000064,Rother,Sussex


#### Creating a dictionary of values matching LA code to PFA name

In [4]:
dict = ons_pfa.set_index('LAD22CD')['PFA22NM'].to_dict()
dict

{'E08000001': 'Greater Manchester',
 'E08000002': 'Greater Manchester',
 'E08000003': 'Greater Manchester',
 'E08000004': 'Greater Manchester',
 'E08000005': 'Greater Manchester',
 'E08000006': 'Greater Manchester',
 'E08000007': 'Greater Manchester',
 'E08000008': 'Greater Manchester',
 'E08000009': 'Greater Manchester',
 'E08000010': 'Greater Manchester',
 'E06000006': 'Cheshire',
 'E06000007': 'Cheshire',
 'E06000049': 'Cheshire',
 'E06000050': 'Cheshire',
 'E08000037': 'Northumbria',
 'E08000021': 'Northumbria',
 'E08000022': 'Northumbria',
 'E08000023': 'Northumbria',
 'E08000024': 'Northumbria',
 'E06000057': 'Northumbria',
 'E06000005': 'Durham',
 'E06000047': 'Durham',
 'E06000014': 'North Yorkshire',
 'E07000163': 'North Yorkshire',
 'E07000164': 'North Yorkshire',
 'E07000165': 'North Yorkshire',
 'E07000166': 'North Yorkshire',
 'E07000167': 'North Yorkshire',
 'E07000168': 'North Yorkshire',
 'E07000169': 'North Yorkshire',
 'E08000032': 'West Yorkshire',
 'E08000033': 'Wes

#### Let's now run the dataframe through a for loop to assign PFAs to all of the LAs

In [5]:
for key in dict:
    df.loc[df['ladcode21'].str.contains(key), 'pfa'] = dict[key]

In [6]:
df

Unnamed: 0,ladcode21,ladname21,year,population,pfa
0,E06000001,Hartlepool,2001,32246,Cleveland
1,E06000002,Middlesbrough,2001,50887,Cleveland
2,E06000003,Redcar and Cleveland,2001,50671,Cleveland
3,E06000004,Stockton-on-Tees,2001,66611,Cleveland
4,E06000005,Darlington,2001,35701,Durham
...,...,...,...,...,...
6946,W06000020,Torfaen,2021,34999,Gwent
6947,W06000021,Monmouthshire,2021,36922,Gwent
6948,W06000022,Newport,2021,60097,Gwent
6949,W06000023,Powys,2021,53829,Dyfed-Powys


#### Just checking that everything is okay with one of the new Unitary Authorities

In [7]:
df.query('ladname21 == "West Northamptonshire"')

Unnamed: 0,ladcode21,ladname21,year,population,pfa
58,E06000062,West Northamptonshire,2001,128778,Northamptonshire
389,E06000062,West Northamptonshire,2002,130268,Northamptonshire
720,E06000062,West Northamptonshire,2003,131466,Northamptonshire
1051,E06000062,West Northamptonshire,2004,132176,Northamptonshire
1382,E06000062,West Northamptonshire,2005,134686,Northamptonshire
1713,E06000062,West Northamptonshire,2006,137173,Northamptonshire
2044,E06000062,West Northamptonshire,2007,138684,Northamptonshire
2375,E06000062,West Northamptonshire,2008,139793,Northamptonshire
2706,E06000062,West Northamptonshire,2009,140799,Northamptonshire
3037,E06000062,West Northamptonshire,2010,141811,Northamptonshire


#### Dropping any entries for city of london

In [8]:
df.query('ladname21 == "City of London"')

Unnamed: 0,ladcode21,ladname21,year,population,pfa
276,E09000001,City of London,2001,3625,"London, City of"
607,E09000001,City of London,2002,3622,"London, City of"
938,E09000001,City of London,2003,3552,"London, City of"
1269,E09000001,City of London,2004,3540,"London, City of"
1600,E09000001,City of London,2005,3534,"London, City of"
1931,E09000001,City of London,2006,3635,"London, City of"
2262,E09000001,City of London,2007,3870,"London, City of"
2593,E09000001,City of London,2008,3805,"London, City of"
2924,E09000001,City of London,2009,3840,"London, City of"
3255,E09000001,City of London,2010,3730,"London, City of"


In [9]:
df2 = df.query('ladname21 != "City of London"').copy()
df2

Unnamed: 0,ladcode21,ladname21,year,population,pfa
0,E06000001,Hartlepool,2001,32246,Cleveland
1,E06000002,Middlesbrough,2001,50887,Cleveland
2,E06000003,Redcar and Cleveland,2001,50671,Cleveland
3,E06000004,Stockton-on-Tees,2001,66611,Cleveland
4,E06000005,Darlington,2001,35701,Durham
...,...,...,...,...,...
6946,W06000020,Torfaen,2021,34999,Gwent
6947,W06000021,Monmouthshire,2021,36922,Gwent
6948,W06000022,Newport,2021,60097,Gwent
6949,W06000023,Powys,2021,53829,Dyfed-Powys


Standardising Devon and Cornwall to match CJS data

In [10]:
df2.replace(to_replace='Devon & Cornwall', value="Devon and Cornwall", inplace=True)

In [11]:
df2.query('pfa == "Devon and Cornwall"')

Unnamed: 0,ladcode21,ladname21,year,population,pfa
25,E06000026,Plymouth,2001,90260,Devon and Cornwall
26,E06000027,Torbay,2001,48266,Devon and Cornwall
48,E06000052,Cornwall,2001,187912,Devon and Cornwall
49,E06000053,Isles of Scilly,2001,888,Devon and Cornwall
78,E07000040,East Devon,2001,47315,Devon and Cornwall
...,...,...,...,...,...
6701,E07000043,North Devon,2021,39037,Devon and Cornwall
6702,E07000044,South Hams,2021,35083,Devon and Cornwall
6703,E07000045,Teignbridge,2021,53149,Devon and Cornwall
6704,E07000046,Torridge,2021,27277,Devon and Cornwall


In [12]:
len(df2['pfa'].unique())

42

In [13]:
df2.reset_index(drop=True).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6930 entries, 0 to 6929
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   ladcode21   6930 non-null   object
 1   ladname21   6930 non-null   object
 2   year        6930 non-null   int64 
 3   population  6930 non-null   int64 
 4   pfa         6930 non-null   object
dtypes: int64(2), object(3)
memory usage: 270.8+ KB


#### Saving data out to CSV

In [14]:
df2.to_csv('data/interim/LA_population_female_2001_2021_PFAs_NOT_REBASED.csv', index=False)