# Women's imprisonment rates
## Data Merging: LA population data with PFAs

#### Importing pandas library and reading in data

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('../data/interim/LA_population_female_2001_2020-cleansed.csv')
df.head()

Unnamed: 0,ladcode21,laname21,year,population
0,E06000001,Hartlepool,2001,35629
1,E06000002,Middlesbrough,2001,54982
2,E06000003,Redcar and Cleveland,2001,56080
3,E06000004,Stockton-on-Tees,2001,72422
4,E06000005,Darlington,2001,39947


#### Reading in ONS PFA by LACODE21 dataset

In [5]:
ons_pfa = pd.read_csv('../data/external/LAD21_CSP21_PFA21_EW_LU.csv', usecols=['LAD21CD','LAD21NM', 'PFA21NM'])
ons_pfa

Unnamed: 0,LAD21CD,LAD21NM,PFA21NM
0,E09000002,Barking and Dagenham,Metropolitan Police
1,E09000003,Barnet,Metropolitan Police
2,E09000004,Bexley,Metropolitan Police
3,E09000005,Brent,Metropolitan Police
4,E09000006,Bromley,Metropolitan Police
...,...,...,...
336,W06000024,Merthyr Tydfil,South Wales
337,W06000023,Powys,Dyfed-Powys
338,W06000008,Ceredigion,Dyfed-Powys
339,W06000009,Pembrokeshire,Dyfed-Powys


#### Creating a dictionary of values matching LA code to PFA name

In [6]:
dict = ons_pfa.set_index('LAD21CD')['PFA21NM'].to_dict()
dict

{'E09000002': 'Metropolitan Police',
 'E09000003': 'Metropolitan Police',
 'E09000004': 'Metropolitan Police',
 'E09000005': 'Metropolitan Police',
 'E09000006': 'Metropolitan Police',
 'E09000007': 'Metropolitan Police',
 'E09000033': 'Metropolitan Police',
 'E09000008': 'Metropolitan Police',
 'E09000009': 'Metropolitan Police',
 'E09000010': 'Metropolitan Police',
 'E09000011': 'Metropolitan Police',
 'E09000012': 'Metropolitan Police',
 'E09000013': 'Metropolitan Police',
 'E09000014': 'Metropolitan Police',
 'E09000015': 'Metropolitan Police',
 'E09000016': 'Metropolitan Police',
 'E09000017': 'Metropolitan Police',
 'E09000018': 'Metropolitan Police',
 'E09000019': 'Metropolitan Police',
 'E09000020': 'Metropolitan Police',
 'E09000021': 'Metropolitan Police',
 'E09000022': 'Metropolitan Police',
 'E09000023': 'Metropolitan Police',
 'E09000024': 'Metropolitan Police',
 'E09000025': 'Metropolitan Police',
 'E09000026': 'Metropolitan Police',
 'E09000027': 'Metropolitan Police',
 

#### Let's now run the dataframe through a for loop to assign PFAs to all of the LAs

In [7]:
for key in dict:
    df.loc[df['ladcode21'].str.contains(key), 'pfa'] = dict[key]

In [8]:
df

Unnamed: 0,ladcode21,laname21,year,population,pfa
0,E06000001,Hartlepool,2001,35629,Cleveland
1,E06000002,Middlesbrough,2001,54982,Cleveland
2,E06000003,Redcar and Cleveland,2001,56080,Cleveland
3,E06000004,Stockton-on-Tees,2001,72422,Cleveland
4,E06000005,Darlington,2001,39947,Durham
...,...,...,...,...,...
6615,W06000020,Torfaen,2020,39169,Gwent
6616,W06000021,Monmouthshire,2020,39955,Gwent
6617,W06000022,Newport,2020,61972,Gwent
6618,W06000023,Powys,2020,55565,Dyfed-Powys


#### Just checking that everything is okay with one of the new Unitary Authorities

In [10]:
df.query('laname21 == "West Northamptonshire"')

Unnamed: 0,ladcode21,laname21,year,population,pfa
58,E06000062,West Northamptonshire,2001,135510,Northamptonshire
389,E06000062,West Northamptonshire,2002,137011,Northamptonshire
720,E06000062,West Northamptonshire,2003,138102,Northamptonshire
1051,E06000062,West Northamptonshire,2004,139078,Northamptonshire
1382,E06000062,West Northamptonshire,2005,140887,Northamptonshire
1713,E06000062,West Northamptonshire,2006,143053,Northamptonshire
2044,E06000062,West Northamptonshire,2007,144775,Northamptonshire
2375,E06000062,West Northamptonshire,2008,145997,Northamptonshire
2706,E06000062,West Northamptonshire,2009,147255,Northamptonshire
3037,E06000062,West Northamptonshire,2010,148189,Northamptonshire


#### Dropping any entries for city of london

In [19]:
df.query('laname21 == "City of London"')

Unnamed: 0,ladcode21,laname21,year,population,pfa
276,E09000001,City of London,2001,2994,"London, City of"
607,E09000001,City of London,2002,2980,"London, City of"
938,E09000001,City of London,2003,2918,"London, City of"
1269,E09000001,City of London,2004,2932,"London, City of"
1600,E09000001,City of London,2005,2973,"London, City of"
1931,E09000001,City of London,2006,2983,"London, City of"
2262,E09000001,City of London,2007,3079,"London, City of"
2593,E09000001,City of London,2008,2995,"London, City of"
2924,E09000001,City of London,2009,3006,"London, City of"
3255,E09000001,City of London,2010,2930,"London, City of"


In [24]:
df2 = df.query('laname21 != "City of London"').copy()
df2

Unnamed: 0,ladcode21,laname21,year,population,pfa
0,E06000001,Hartlepool,2001,35629,Cleveland
1,E06000002,Middlesbrough,2001,54982,Cleveland
2,E06000003,Redcar and Cleveland,2001,56080,Cleveland
3,E06000004,Stockton-on-Tees,2001,72422,Cleveland
4,E06000005,Darlington,2001,39947,Durham
...,...,...,...,...,...
6615,W06000020,Torfaen,2020,39169,Gwent
6616,W06000021,Monmouthshire,2020,39955,Gwent
6617,W06000022,Newport,2020,61972,Gwent
6618,W06000023,Powys,2020,55565,Dyfed-Powys


Standardising Devon and Cornwall to match CJS data

In [25]:
df2.replace(to_replace='Devon & Cornwall', value="Devon and Cornwall", inplace=True)

In [26]:
df2.query('pfa == "Devon and Cornwall"')

Unnamed: 0,ladcode21,laname21,year,population,pfa
25,E06000026,Plymouth,2001,97647,Devon and Cornwall
26,E06000027,Torbay,2001,55254,Devon and Cornwall
48,E06000052,Cornwall,2001,207366,Devon and Cornwall
49,E06000053,Isles of Scilly,2001,869,Devon and Cornwall
78,E07000040,East Devon,2001,54947,Devon and Cornwall
...,...,...,...,...,...
6370,E07000043,North Devon,2020,40760,Devon and Cornwall
6371,E07000044,South Hams,2020,37775,Devon and Cornwall
6372,E07000045,Teignbridge,2020,57891,Devon and Cornwall
6373,E07000046,Torridge,2020,29127,Devon and Cornwall


In [28]:
len(df2['pfa'].unique())

42

#### Saving data out to CSV

In [29]:
df2.to_csv('../data/interim/LA_population_female_2001_2021_PFAs_cleansed.csv', index=False)