# Creating the London and Merseyside CSVs

Load in libraries:

In [133]:
import os
import pandas as pd 
from pyproj import Transformer
import geopandas as gpd

### Load the data

https://data.police.uk/data/

In [134]:
path = '../data/police_zips'    

# find all csv file paths
csv_names = []
for folder in os.listdir(path):
        
            folder_contents = os.listdir(f'{path}/{folder}')
            
            for file in folder_contents:
                
                    
                csv_names.append(f'{path}/{folder}/{file}')



print(f'There are {len(csv_names)} different CSVs')
        

There are 108 different CSVs


### Extract the CSV's

In [135]:
crimes, outcomes, stops = [], [], []

# loop through the csvs
for csv in csv_names:
    
    if "stop-and-search.csv" in csv:
        stops.append(pd.read_csv(csv))


    elif "street.csv" in csv:
        crimes.append(pd.read_csv(csv))

    elif "outcomes.csv" in csv:
        outcomes.append(pd.read_csv(csv))


outcomes_df = pd.concat(outcomes, ignore_index=True)
crimes_df = pd.concat(crimes, ignore_index=True)
stops_df = pd.concat(stops, ignore_index=True)


### Combine the crime and outcomes by 'CRIME ID'

In [136]:
crimes_df = crimes_df.merge(outcomes_df[['Crime ID', 'Outcome type']] , on='Crime ID', how='left')
crimes_df

Unnamed: 0,Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context,Outcome type
0,f344b946a36b4dc1db0a4bb889e8ec0fd23ab65aa2bf39...,2022-01,City of London Police,City of London Police,-0.106453,51.518207,On or near Charterhouse Street,E01000916,Camden 027B,Burglary,Status update unavailable,,
1,e74962917ce995fa9e52623b6fe0c218619b79d4a22550...,2022-01,City of London Police,City of London Police,-0.113256,51.516824,On or near Old Square,E01000914,Camden 028B,Other theft,Investigation complete; no suspect identified,,Investigation complete; no suspect identified
2,067092d6822753127ce767d011ea5c5b4375de6f5a3c48...,2022-01,City of London Police,City of London Police,-0.116100,51.518470,On or near Supermarket,E01000914,Camden 028B,Other theft,Status update unavailable,,
3,,2022-01,City of London Police,City of London Police,-0.097601,51.520699,On or near Carthusian Street,E01000001,City of London 001A,Anti-social behaviour,,,
4,,2022-01,City of London Police,City of London Police,-0.095914,51.520348,On or near Beech Street,E01000001,City of London 001A,Anti-social behaviour,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1360391,7b127e4ff2fd1fdd40db3230cb20fac5af55b7cc66e2b9...,2022-12,Metropolitan Police Service,Metropolitan Police Service,,,No Location,,,Other crime,Investigation complete; no suspect identified,,
1360392,5160c9abc5f0674f5f047e69c26f49f50f61f6e322131d...,2022-12,Metropolitan Police Service,Metropolitan Police Service,,,No Location,,,Other crime,Investigation complete; no suspect identified,,
1360393,cee8aad8bb809fc0daefafd0d2fcece46fb554940212d0...,2022-12,Metropolitan Police Service,Metropolitan Police Service,,,No Location,,,Other crime,Offender given a caution,,Offender given a caution
1360394,8f12b9f524fc5c8ec0e3965f7d7ab406632a2b3549aa12...,2022-12,Metropolitan Police Service,Metropolitan Police Service,,,No Location,,,Other crime,Status update unavailable,,


### Convert to datetime


In [137]:
crimes_df['Date'] = pd.to_datetime(crimes_df['Month'], format= '%Y-%m')
stops_df['Date'] = pd.to_datetime(stops_df['Date']).dt.tz_convert(None) # convert to datetime and remove timezone
stops_df.drop(columns=['Part of a policing operation', 'Policing operation'], inplace=True) # remove empty columns
stops_df

Unnamed: 0,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing
0,Person search,2022-01-01 01:14:23,51.506255,-0.074901,Male,10-17,Other ethnic group - Not stated,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,False,False
1,Person search,2022-01-01 01:20:32,51.506255,-0.074901,Male,10-17,Other ethnic group - Not stated,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,False,False
2,Person search,2022-01-01 01:28:56,51.506255,-0.074901,Male,10-17,Asian/Asian British - Bangladeshi,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,False,False
3,Person search,2022-01-01 01:48:59,51.508066,-0.087780,Male,,,,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,False,False
4,Person search,2022-01-01 02:49:39,51.517680,-0.078484,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
235906,Person and Vehicle search,2022-12-17 21:35:00,51.631560,-0.195095,Male,25-34,Asian/Asian British - Any other Asian background,Asian,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,
235907,Person search,2022-12-17 21:57:00,,,Male,25-34,Other ethnic group - Not stated,White,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,Community resolution,,
235908,Person search,2022-12-17 22:00:00,51.514365,-0.143408,Female,over 34,White - Any other White background,White,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,,
235909,Person search,2022-12-17 22:16:00,,,Male,10-17,Black/African/Caribbean/Black British - Caribbean,Black,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,,


## Map LSOA to coords

In [138]:
LSOA_df = pd.read_csv('../data/mapping_csvs/LSOA_to_coords.csv')

# Define transformer from OSGB36 (EPSG:27700) to WGS84 (EPSG:4326)
transformer = Transformer.from_crs("EPSG:27700", "EPSG:4326", always_xy=True)

def convert_uk_coords(eastings, northings):
    """Vectorised function to convert Eastings/Northings to Longitude/Latitude."""
    longitudes, latitudes = transformer.transform(eastings, northings)
    return longitudes, latitudes



# Example DataFrame
LSOA_df['Longitude'], LSOA_df['Latitude'] = convert_uk_coords(LSOA_df['x'], LSOA_df['y'])


LSOA_df = LSOA_df[['LSOA01CD', 'Longitude', 'Latitude']]
LSOA_df

Unnamed: 0,LSOA01CD,Longitude,Latitude
0,E01000001,-0.096266,51.519526
1,E01000002,-0.092626,51.519692
2,E01000003,-0.095916,51.522029
3,E01000004,-0.097571,51.514134
4,E01000005,-0.074945,51.513756
...,...,...,...
34373,W01001892,-3.210109,51.507316
34374,W01001893,-3.248157,51.532365
34375,W01001894,-3.231427,51.522516
34376,W01001895,-3.231117,51.517978


### Geo map the LSOA codes

In [139]:
# Convert crime DataFrames to GeoDataFrames
gdf_ss = gpd.GeoDataFrame(stops_df, 
                            geometry=gpd.points_from_xy(stops_df['Longitude'], 
                                                        stops_df['Latitude']),
                            crs="EPSG:4326")  # WGS84 (standard lat/lon)

# Convert LSOA DataFrame to GeoDataFrame
gdf_lsoa = gpd.GeoDataFrame(LSOA_df, 
                            geometry=gpd.points_from_xy(LSOA_df['Longitude'], 
                                                        LSOA_df['Latitude']),
                            crs="EPSG:4326")

# Reproject to British National Grid (EPSG:27700) for accurate distance calculations
gdf_ss = gdf_ss.to_crs("EPSG:27700")
gdf_lsoa = gdf_lsoa.to_crs("EPSG:27700")

# Perform spatial join with corrected CRS
stops_df = gpd.sjoin_nearest(gdf_ss, gdf_lsoa, how="left", distance_col="distance")


# Convert back to EPSG:4326 (optional, if needed for mapping or output)
stops_df = stops_df.to_crs('EPSG:4326')

# Drop unnecessary columns if they exist
cols_to_drop = ['geometry', 'Longitude_right', 'Latitude_right']
stops_df = stops_df.drop(columns=[col for col in cols_to_drop if col in stops_df.columns])

# Rename columns to maintain consistency
stops_df = stops_df.rename(columns={'LSOA01CD': 'LSOA code',
                                          'Latitude_left': 'Latitude',
                                          'Longitude_left': 'Longitude'})

display(stops_df)


Unnamed: 0,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing,index_right,LSOA code,distance
0,Person search,2022-01-01 01:14:23,51.506255,-0.074901,Male,10-17,Other ethnic group - Not stated,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,False,False,4024.0,E01004025,414.636814
1,Person search,2022-01-01 01:20:32,51.506255,-0.074901,Male,10-17,Other ethnic group - Not stated,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,False,False,4024.0,E01004025,414.636814
2,Person search,2022-01-01 01:28:56,51.506255,-0.074901,Male,10-17,Asian/Asian British - Bangladeshi,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,False,False,4024.0,E01004025,414.636814
3,Person search,2022-01-01 01:48:59,51.508066,-0.087780,Male,,,,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,False,False,3934.0,E01003935,566.702621
4,Person search,2022-01-01 02:49:39,51.517680,-0.078484,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,False,False,4309.0,E01004310,319.655331
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235906,Person and Vehicle search,2022-12-17 21:35:00,51.631560,-0.195095,Male,25-34,Asian/Asian British - Any other Asian background,Asian,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,,278.0,E01000279,463.259019
235907,Person search,2022-12-17 21:57:00,,,Male,25-34,Other ethnic group - Not stated,White,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,Community resolution,,,,,
235908,Person search,2022-12-17 22:00:00,51.514365,-0.143408,Female,over 34,White - Any other White background,White,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,,,4763.0,E01004764,400.351140
235909,Person search,2022-12-17 22:16:00,,,Male,10-17,Black/African/Caribbean/Black British - Caribbean,Black,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,,,,,


## Clean dual jurisdiction cases 

And add missing LSOAs with no stop and search data

https://geoportal.statistics.gov.uk/datasets/ons::lsoa-2021-to-local-authority-districts-april-2023-best-fit-lookup-in-ew/explore


In [140]:
LSOA_names = pd.read_csv('../data/mapping_csvs/LSOA_names.csv')

# Define a list of Merseyside boroughs 
#https://www.ons.gov.uk/visualisations/areas/E11000002/ 
merseyside_names = ['Liverpool', 'Wirral', 'Sefton', 'Knowsley', 'St. Helens']




# Define a list of London boroughs 
# https://www.ons.gov.uk/visualisations/areas/E12000007/
london_boroughs = [
    "Barking and Dagenham", "Barnet", "Bexley", "Brent", "Bromley", "Camden",
    "Croydon", "Ealing", "Enfield", "Greenwich", "Hackney", "Hammersmith and Fulham",
    "Haringey", "Harrow", "Havering", "Hillingdon", "Hounslow", "Islington",
    "Kensington and Chelsea", "Kingston upon Thames", "Lambeth", "Lewisham",
    "Merton", "Newham", "Redbridge", "Richmond upon Thames", "Southwark",
    "Sutton", "Tower Hamlets", "Waltham Forest", "Wandsworth", "Westminster",
    "City of London" 
]

# filter certain boroughs
london_LSOAs = LSOA_names[LSOA_names['LAD23NM'].isin(london_boroughs)]
merseyside_LSOAs = LSOA_names[LSOA_names['LAD23NM'].isin(merseyside_names)]

# sort only LSOA and Borough columns
merseyside_LSOAs = merseyside_LSOAs[['LSOA21CD', 'LAD23NM']]
london_LSOAs = london_LSOAs[['LSOA21CD', 'LAD23NM']]

# change column names
london_LSOAs.columns = ['LSOA code', 'Borough']
merseyside_LSOAs.columns = ['LSOA code', 'Borough']


# # use merge to filter out bad cases
# merseyside = stops_df.merge(merseyside_LSOAs, on='LSOA code', how='right')
# london = stops_df.merge(london_LSOAs, on='LSOA code', how='right')
merseyside = stops_df.merge(merseyside_LSOAs, on='LSOA code', how='right')
london = stops_df.merge(london_LSOAs, on='LSOA code', how='right')

# # Display results
display(merseyside, london)

Unnamed: 0,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing,index_right,LSOA code,distance,Borough
0,Person and Vehicle search,2022-01-16 16:57:20,53.476675,-2.875641,Male,18-24,Mixed/Multiple ethnic groups - Any other Mixed...,Black,Police and Criminal Evidence Act 1984 (section 1),Article for use in theft,A no further action disposal,,False,6433.0,E01006434,171.438495,Knowsley
1,Person search,2022-01-17 02:03:41,53.476675,-2.875641,Male,25-34,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Controlled drugs,A no further action disposal,,False,6433.0,E01006434,171.438495,Knowsley
2,Person search,2022-01-19 18:37:52,53.475239,-2.867928,Male,over 34,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Article for use in theft,A no further action disposal,,False,6433.0,E01006434,454.001372,Knowsley
3,Person search,2022-01-25 11:48:55,53.474090,-2.877502,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,6433.0,E01006434,221.084998,Knowsley
4,Person search,2022-01-31 11:23:07,53.474593,-2.876307,Male,25-34,Asian/Asian British - Any other Asian background,Asian,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,,False,6433.0,E01006434,124.013202,Knowsley
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37780,,NaT,,,,,,,,,,,,,E01034836,,Wirral
37781,,NaT,,,,,,,,,,,,,E01034837,,Wirral
37782,,NaT,,,,,,,,,,,,,E01034838,,Wirral
37783,,NaT,,,,,,,,,,,,,E01034839,,Wirral


Unnamed: 0,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing,index_right,LSOA code,distance,Borough
0,Person search,2022-01-10 02:40:15,51.520699,-0.097601,Male,over 34,Other ethnic group - Not stated,Black,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,False,False,0.0,E01000001,160.042424,City of London
1,Person search,2022-01-10 12:37:39,51.518864,-0.097562,Male,over 34,Mixed/Multiple ethnic groups - White and Black...,Black,Police and Criminal Evidence Act 1984 (section 1),Article for use in theft,Arrest,True,False,0.0,E01000001,116.270257,City of London
2,Person search,2022-01-20 05:14:39,51.520206,-0.097736,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Article for use in theft,A no further action disposal,False,False,0.0,E01000001,127.020561,City of London
3,Person search,2022-01-26 08:17:00,51.519703,-0.101649,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,Community resolution,,,0.0,E01000001,374.125238,City of London
4,Person search,2022-02-04 08:59:46,51.517146,-0.098642,Male,18-24,Black/African/Caribbean/Black British - Caribbean,Black,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,False,False,0.0,E01000001,311.926748,City of London
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140289,Person search,2022-12-16 20:20:00,51.485949,-0.141768,Male,18-24,Other ethnic group - Any other ethnic group,Other,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Community resolution,,,4664.0,E01004665,55.143696,Westminster
140290,Person search,2022-12-16 20:20:00,51.485949,-0.141768,Male,18-24,Asian/Asian British - Any other Asian background,Asian,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Community resolution,,,4664.0,E01004665,55.143696,Westminster
140291,Person search,2022-12-16 20:20:00,51.485949,-0.141768,Male,18-24,Other ethnic group - Any other ethnic group,Other,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Community resolution,,,4664.0,E01004665,55.143696,Westminster
140292,Person search,2022-12-16 20:25:00,51.485949,-0.141768,Male,18-24,White - Any other White background,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Community resolution,,,4664.0,E01004665,55.143696,Westminster


# Creating LSOA Statistics

## House Prices

https://www.ons.gov.uk/peoplepopulationandcommunity/housing/datasets/meanpricepaidbylowerlayersuperoutputareahpssadataset47


In [141]:
# Read the 5th sheet (index 4, since indexing starts from 0)
xls = pd.read_excel('../data/LSOA_data/LSOA_house_prices.xls', 
                    sheet_name=5, 
                    engine='xlrd')

headings = xls.iloc[4]
house_prices = xls.iloc[5:]

# headings
house_prices.columns = headings

house_prices = house_prices[['Local authority code','LSOA code', 
                             'Year ending Jun 2022', 'Year ending Sep 2022', 'Year ending Dec 2022']]


house_prices['mean_house_price'] = (
    house_prices.iloc[:, 2:]
    .apply(pd.to_numeric, errors='coerce')  # Convert non-numeric values to NaN
    .mean(axis=1)
    .round()
)

house_prices = house_prices[['LSOA code', 'mean_house_price']].reset_index()
house_prices 

4,index,LSOA code,mean_house_price
0,5,E01011949,100415.0
1,6,E01011950,55639.0
2,7,E01011951,71779.0
3,8,E01011952,77052.0
4,9,E01011953,95933.0
...,...,...,...
34748,34753,W01001320,187601.0
34749,34754,W01001321,128623.0
34750,34755,W01001322,162018.0
34751,34756,W01001324,179577.0


In [142]:
# Merge house prices data with the merseyside DataFrame
merseyside = house_prices.merge(merseyside, on='LSOA code', how='right')
london = house_prices.merge(london, on='LSOA code', how='right')

# Calculate the mean of 'mean_house_price'
m_mean = merseyside['mean_house_price'].mean()
l_mean = london['mean_house_price'].mean()

# Fill missing values with the mean house price
merseyside['mean_house_price'] = merseyside['mean_house_price'].fillna(m_mean)
london['mean_house_price'] = london['mean_house_price'].fillna(l_mean) # change the value of parliament to BIG!!!!!


### Fix City of London House Prices

https://www.ons.gov.uk/visualisations/housingpriceslocal/E09000033/


In [143]:
CoL_LSAOs = LSOA_names[LSOA_names['LAD23NM'].isin(["City of London"])][['LSOA21CD']]

for place in CoL_LSAOs.values:

    london.loc[london['LSOA code'].isin(place), 'mean_house_price'] = 11_100_000

london

Unnamed: 0,index,LSOA code,mean_house_price,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing,index_right,distance,Borough
0,28014.0,E01000001,1.110000e+07,Person search,2022-01-10 02:40:15,51.520699,-0.097601,Male,over 34,Other ethnic group - Not stated,Black,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,False,False,0.0,160.042424,City of London
1,28014.0,E01000001,1.110000e+07,Person search,2022-01-10 12:37:39,51.518864,-0.097562,Male,over 34,Mixed/Multiple ethnic groups - White and Black...,Black,Police and Criminal Evidence Act 1984 (section 1),Article for use in theft,Arrest,True,False,0.0,116.270257,City of London
2,28014.0,E01000001,1.110000e+07,Person search,2022-01-20 05:14:39,51.520206,-0.097736,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Article for use in theft,A no further action disposal,False,False,0.0,127.020561,City of London
3,28014.0,E01000001,1.110000e+07,Person search,2022-01-26 08:17:00,51.519703,-0.101649,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,Community resolution,,,0.0,374.125238,City of London
4,28014.0,E01000001,1.110000e+07,Person search,2022-02-04 08:59:46,51.517146,-0.098642,Male,18-24,Black/African/Caribbean/Black British - Caribbean,Black,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,False,False,0.0,311.926748,City of London
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140289,32738.0,E01004665,7.260344e+05,Person search,2022-12-16 20:20:00,51.485949,-0.141768,Male,18-24,Other ethnic group - Any other ethnic group,Other,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Community resolution,,,4664.0,55.143696,Westminster
140290,32738.0,E01004665,7.260344e+05,Person search,2022-12-16 20:20:00,51.485949,-0.141768,Male,18-24,Asian/Asian British - Any other Asian background,Asian,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Community resolution,,,4664.0,55.143696,Westminster
140291,32738.0,E01004665,7.260344e+05,Person search,2022-12-16 20:20:00,51.485949,-0.141768,Male,18-24,Other ethnic group - Any other ethnic group,Other,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Community resolution,,,4664.0,55.143696,Westminster
140292,32738.0,E01004665,7.260344e+05,Person search,2022-12-16 20:25:00,51.485949,-0.141768,Male,18-24,White - Any other White background,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Community resolution,,,4664.0,55.143696,Westminster


## LSOA % non-white

https://www.nomisweb.co.uk/sources/census_2021_bulk


In [144]:
LSOA_ethnic = pd.read_csv('../data/LSOA_data/2021census_ethnic.csv') 

nonWhite =  (1- LSOA_ethnic['Ethnic group: White'] / LSOA_ethnic.iloc[:,3])*100
LSOA_ethnic
LSOA_ethnic = LSOA_ethnic.iloc[:, [2,3]]
LSOA_ethnic['nonWhite'] = round(nonWhite,2)
LSOA_ethnic.columns = ['LSOA code', 'population', 'nonWhite']

merseyside = merseyside.merge(LSOA_ethnic, on='LSOA code', how='left')
london = london.merge(LSOA_ethnic, on='LSOA code', how='left')


## Crime Data

In [145]:
# Step 1: Fill NaNs in 'Outcome type' with values from 'Last outcome category'
crimes_df['Outcome type'] = crimes_df['Outcome type'].fillna(crimes_df['Last outcome category'])

# Step 2: Ensure that 'Outcome type' remains unchanged where it differs from 'Last outcome category'
crimes_df.loc[crimes_df['Last outcome category'] != crimes_df['Outcome type'], 'Outcome type'] = crimes_df['Outcome type']


crimes_df

Unnamed: 0,Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context,Outcome type,Date
0,f344b946a36b4dc1db0a4bb889e8ec0fd23ab65aa2bf39...,2022-01,City of London Police,City of London Police,-0.106453,51.518207,On or near Charterhouse Street,E01000916,Camden 027B,Burglary,Status update unavailable,,Status update unavailable,2022-01-01
1,e74962917ce995fa9e52623b6fe0c218619b79d4a22550...,2022-01,City of London Police,City of London Police,-0.113256,51.516824,On or near Old Square,E01000914,Camden 028B,Other theft,Investigation complete; no suspect identified,,Investigation complete; no suspect identified,2022-01-01
2,067092d6822753127ce767d011ea5c5b4375de6f5a3c48...,2022-01,City of London Police,City of London Police,-0.116100,51.518470,On or near Supermarket,E01000914,Camden 028B,Other theft,Status update unavailable,,Status update unavailable,2022-01-01
3,,2022-01,City of London Police,City of London Police,-0.097601,51.520699,On or near Carthusian Street,E01000001,City of London 001A,Anti-social behaviour,,,,2022-01-01
4,,2022-01,City of London Police,City of London Police,-0.095914,51.520348,On or near Beech Street,E01000001,City of London 001A,Anti-social behaviour,,,,2022-01-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1360391,7b127e4ff2fd1fdd40db3230cb20fac5af55b7cc66e2b9...,2022-12,Metropolitan Police Service,Metropolitan Police Service,,,No Location,,,Other crime,Investigation complete; no suspect identified,,Investigation complete; no suspect identified,2022-12-01
1360392,5160c9abc5f0674f5f047e69c26f49f50f61f6e322131d...,2022-12,Metropolitan Police Service,Metropolitan Police Service,,,No Location,,,Other crime,Investigation complete; no suspect identified,,Investigation complete; no suspect identified,2022-12-01
1360393,cee8aad8bb809fc0daefafd0d2fcece46fb554940212d0...,2022-12,Metropolitan Police Service,Metropolitan Police Service,,,No Location,,,Other crime,Offender given a caution,,Offender given a caution,2022-12-01
1360394,8f12b9f524fc5c8ec0e3965f7d7ab406632a2b3549aa12...,2022-12,Metropolitan Police Service,Metropolitan Police Service,,,No Location,,,Other crime,Status update unavailable,,Status update unavailable,2022-12-01


In [146]:
# crime and drug counts per LSOA
crime_counts = crimes_df['LSOA code'].value_counts().reset_index()
drug_counts = crimes_df.loc[crimes_df['Crime type'] == 'Drugs', 'LSOA code'].value_counts().reset_index()

drug_counts.columns = ['LSOA code', 'drug_sum']
crime_counts.columns = ['LSOA code', 'crime_sum']

# Merge with region data and calculate rates in one go
for df in [merseyside, london]:

    df = df.merge(crime_counts, on='LSOA code', how='left').merge(drug_counts, on='LSOA code', how='left')

    df['crimeRate'] = round(df['crime_sum'] / df['population'], 2)
    df['drugRate'] = round(df['drug_sum'] / df['population'], 2)


display(merseyside, london)  # Display final DataFrames


Unnamed: 0,index,LSOA code,mean_house_price,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,...,Legislation,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing,index_right,distance,Borough,population,nonWhite
0,22486.0,E01006434,124940.0000,Person and Vehicle search,2022-01-16 16:57:20,53.476675,-2.875641,Male,18-24,Mixed/Multiple ethnic groups - Any other Mixed...,...,Police and Criminal Evidence Act 1984 (section 1),Article for use in theft,A no further action disposal,,False,6433.0,171.438495,Knowsley,1518,1.91
1,22486.0,E01006434,124940.0000,Person search,2022-01-17 02:03:41,53.476675,-2.875641,Male,25-34,White - English/Welsh/Scottish/Northern Irish/...,...,Police and Criminal Evidence Act 1984 (section 1),Controlled drugs,A no further action disposal,,False,6433.0,171.438495,Knowsley,1518,1.91
2,22486.0,E01006434,124940.0000,Person search,2022-01-19 18:37:52,53.475239,-2.867928,Male,over 34,White - English/Welsh/Scottish/Northern Irish/...,...,Police and Criminal Evidence Act 1984 (section 1),Article for use in theft,A no further action disposal,,False,6433.0,454.001372,Knowsley,1518,1.91
3,22486.0,E01006434,124940.0000,Person search,2022-01-25 11:48:55,53.474090,-2.877502,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,...,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,6433.0,221.084998,Knowsley,1518,1.91
4,22486.0,E01006434,124940.0000,Person search,2022-01-31 11:23:07,53.474593,-2.876307,Male,25-34,Asian/Asian British - Any other Asian background,...,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,,False,6433.0,124.013202,Knowsley,1518,1.91
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37780,,E01034836,146279.5293,,NaT,,,,,,...,,,,,,,,Wirral,1253,7.10
37781,,E01034837,146279.5293,,NaT,,,,,,...,,,,,,,,Wirral,1638,6.59
37782,,E01034838,146279.5293,,NaT,,,,,,...,,,,,,,,Wirral,1022,8.41
37783,,E01034839,146279.5293,,NaT,,,,,,...,,,,,,,,Wirral,1043,7.86


Unnamed: 0,index,LSOA code,mean_house_price,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,...,Legislation,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing,index_right,distance,Borough,population,nonWhite
0,28014.0,E01000001,1.110000e+07,Person search,2022-01-10 02:40:15,51.520699,-0.097601,Male,over 34,Other ethnic group - Not stated,...,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,False,False,0.0,160.042424,City of London,1474,19.88
1,28014.0,E01000001,1.110000e+07,Person search,2022-01-10 12:37:39,51.518864,-0.097562,Male,over 34,Mixed/Multiple ethnic groups - White and Black...,...,Police and Criminal Evidence Act 1984 (section 1),Article for use in theft,Arrest,True,False,0.0,116.270257,City of London,1474,19.88
2,28014.0,E01000001,1.110000e+07,Person search,2022-01-20 05:14:39,51.520206,-0.097736,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,...,Police and Criminal Evidence Act 1984 (section 1),Article for use in theft,A no further action disposal,False,False,0.0,127.020561,City of London,1474,19.88
3,28014.0,E01000001,1.110000e+07,Person search,2022-01-26 08:17:00,51.519703,-0.101649,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,...,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,Community resolution,,,0.0,374.125238,City of London,1474,19.88
4,28014.0,E01000001,1.110000e+07,Person search,2022-02-04 08:59:46,51.517146,-0.098642,Male,18-24,Black/African/Caribbean/Black British - Caribbean,...,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,False,False,0.0,311.926748,City of London,1474,19.88
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140289,32738.0,E01004665,7.260344e+05,Person search,2022-12-16 20:20:00,51.485949,-0.141768,Male,18-24,Other ethnic group - Any other ethnic group,...,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Community resolution,,,4664.0,55.143696,Westminster,1456,60.10
140290,32738.0,E01004665,7.260344e+05,Person search,2022-12-16 20:20:00,51.485949,-0.141768,Male,18-24,Asian/Asian British - Any other Asian background,...,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Community resolution,,,4664.0,55.143696,Westminster,1456,60.10
140291,32738.0,E01004665,7.260344e+05,Person search,2022-12-16 20:20:00,51.485949,-0.141768,Male,18-24,Other ethnic group - Any other ethnic group,...,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Community resolution,,,4664.0,55.143696,Westminster,1456,60.10
140292,32738.0,E01004665,7.260344e+05,Person search,2022-12-16 20:25:00,51.485949,-0.141768,Male,18-24,White - Any other White background,...,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Community resolution,,,4664.0,55.143696,Westminster,1456,60.10


## Mapping the nearest station to the stop 

https://github.com/davwheat/uk-railway-stations/blob/main/stations.csv

In [147]:
stations_df = pd.read_csv('https://raw.githubusercontent.com/davwheat/uk-railway-stations/refs/heads/main/stations.csv')

stations_df.columns = ['Name', 'Latitude', 'Longitude', 'code#1', 'code#2']
stations_df = stations_df[['Longitude', 'Latitude']]

stations_df

Unnamed: 0,Longitude,Latitude
0,0.120343,51.490719
1,-3.230890,51.575363
2,-3.329549,51.642620
3,-3.443130,51.715019
4,-2.097464,57.143127
...,...,...
2619,-4.387464,55.892792
2620,-1.093159,53.957966
2621,-2.736450,52.809009
2622,-3.241342,51.640884


In [148]:
# Step 1: Convert Merseyside points to a GeoDataFrame
merseyside_map = gpd.GeoDataFrame(merseyside, 
                        geometry=gpd.points_from_xy(merseyside['Longitude'], 
                                                    merseyside['Latitude']),
                        crs="EPSG:4326")  # WGS84 (lat/lon)

london_map = gpd.GeoDataFrame(london, 
                        geometry=gpd.points_from_xy(london['Longitude'], 
                                                    london['Latitude']),
                        crs="EPSG:4326")  # WGS84 (lat/lon)


# Step 2: Convert Stations points to a GeoDataFrame
map_to_station = gpd.GeoDataFrame(stations_df, 
                        geometry=gpd.points_from_xy(stations_df['Longitude'], 
                                                    stations_df['Latitude']),
                        crs="EPSG:4326")  # WGS84 (lat/lon)

# Step 3: Reproject to British National Grid (EPSG:27700) for accurate distance measurements
merseyside_map = merseyside_map.to_crs("EPSG:27700")
london_map = london_map.to_crs("EPSG:27700")
map_to_station = map_to_station.to_crs("EPSG:27700")

# Step 4: Perform spatial join to map nearest train station to each Merseyside point
merseyside_results = gpd.sjoin_nearest(merseyside_map, map_to_station, how="left", distance_col="distance", lsuffix='_merseyside', rsuffix='_station')

london_results = gpd.sjoin_nearest(london_map, map_to_station, how="left", distance_col="distance", lsuffix='_london', rsuffix='_station')


# Step 5: Extract the list of distances
m_distances_list = merseyside_results['distance']
l_distances_list = london_results['distance']


# add the distances to dfs
merseyside['distStation'] = round(m_distances_list)
london['distStation'] = round(l_distances_list)

display(merseyside, london)

Unnamed: 0,index,LSOA code,mean_house_price,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,...,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing,index_right,distance,Borough,population,nonWhite,distStation
0,22486.0,E01006434,124940.0000,Person and Vehicle search,2022-01-16 16:57:20,53.476675,-2.875641,Male,18-24,Mixed/Multiple ethnic groups - Any other Mixed...,...,Article for use in theft,A no further action disposal,,False,6433.0,171.438495,Knowsley,1518,1.91,1811.0
1,22486.0,E01006434,124940.0000,Person search,2022-01-17 02:03:41,53.476675,-2.875641,Male,25-34,White - English/Welsh/Scottish/Northern Irish/...,...,Controlled drugs,A no further action disposal,,False,6433.0,171.438495,Knowsley,1518,1.91,1811.0
2,22486.0,E01006434,124940.0000,Person search,2022-01-19 18:37:52,53.475239,-2.867928,Male,over 34,White - English/Welsh/Scottish/Northern Irish/...,...,Article for use in theft,A no further action disposal,,False,6433.0,454.001372,Knowsley,1518,1.91,2088.0
3,22486.0,E01006434,124940.0000,Person search,2022-01-25 11:48:55,53.474090,-2.877502,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,...,Controlled drugs,A no further action disposal,,False,6433.0,221.084998,Knowsley,1518,1.91,2087.0
4,22486.0,E01006434,124940.0000,Person search,2022-01-31 11:23:07,53.474593,-2.876307,Male,25-34,Asian/Asian British - Any other Asian background,...,Stolen goods,A no further action disposal,,False,6433.0,124.013202,Knowsley,1518,1.91,2037.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37780,,E01034836,146279.5293,,NaT,,,,,,...,,,,,,,Wirral,1253,7.10,
37781,,E01034837,146279.5293,,NaT,,,,,,...,,,,,,,Wirral,1638,6.59,
37782,,E01034838,146279.5293,,NaT,,,,,,...,,,,,,,Wirral,1022,8.41,
37783,,E01034839,146279.5293,,NaT,,,,,,...,,,,,,,Wirral,1043,7.86,


Unnamed: 0,index,LSOA code,mean_house_price,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,...,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing,index_right,distance,Borough,population,nonWhite,distStation
0,28014.0,E01000001,1.110000e+07,Person search,2022-01-10 02:40:15,51.520699,-0.097601,Male,over 34,Other ethnic group - Not stated,...,Stolen goods,A no further action disposal,False,False,0.0,160.042424,City of London,1474,19.88,495.0
1,28014.0,E01000001,1.110000e+07,Person search,2022-01-10 12:37:39,51.518864,-0.097562,Male,over 34,Mixed/Multiple ethnic groups - White and Black...,...,Article for use in theft,Arrest,True,False,0.0,116.270257,City of London,1474,19.88,507.0
2,28014.0,E01000001,1.110000e+07,Person search,2022-01-20 05:14:39,51.520206,-0.097736,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,...,Article for use in theft,A no further action disposal,False,False,0.0,127.020561,City of London,1474,19.88,481.0
3,28014.0,E01000001,1.110000e+07,Person search,2022-01-26 08:17:00,51.519703,-0.101649,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,...,Stolen goods,Community resolution,,,0.0,374.125238,City of London,1474,19.88,213.0
4,28014.0,E01000001,1.110000e+07,Person search,2022-02-04 08:59:46,51.517146,-0.098642,Male,18-24,Black/African/Caribbean/Black British - Caribbean,...,Stolen goods,A no further action disposal,False,False,0.0,311.926748,City of London,1474,19.88,455.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140289,32738.0,E01004665,7.260344e+05,Person search,2022-12-16 20:20:00,51.485949,-0.141768,Male,18-24,Other ethnic group - Any other ethnic group,...,Controlled drugs,Community resolution,,,4664.0,55.143696,Westminster,1456,60.10,1062.0
140290,32738.0,E01004665,7.260344e+05,Person search,2022-12-16 20:20:00,51.485949,-0.141768,Male,18-24,Asian/Asian British - Any other Asian background,...,Controlled drugs,Community resolution,,,4664.0,55.143696,Westminster,1456,60.10,1062.0
140291,32738.0,E01004665,7.260344e+05,Person search,2022-12-16 20:20:00,51.485949,-0.141768,Male,18-24,Other ethnic group - Any other ethnic group,...,Controlled drugs,Community resolution,,,4664.0,55.143696,Westminster,1456,60.10,1062.0
140292,32738.0,E01004665,7.260344e+05,Person search,2022-12-16 20:25:00,51.485949,-0.141768,Male,18-24,White - Any other White background,...,Controlled drugs,Community resolution,,,4664.0,55.143696,Westminster,1456,60.10,1062.0


## Index of Multiple Deprivation (IMD)


https://www.gov.uk/government/statistics/english-indices-of-deprivation-2019

In [149]:
LSAO_IMD = pd.read_excel('../data/LSOA_data/Index_of_Multiple_Deprivation.xlsx',
              sheet_name=1)


LSAO_IMD = LSAO_IMD.iloc[:,[0,-1, -2]]
LSAO_IMD.columns = ['LSOA code', 'IMDDecile', 'IMDRank']

LSAO_IMD

Unnamed: 0,LSOA code,IMDDecile,IMDRank
0,E01000001,9,29199
1,E01000002,10,30379
2,E01000003,5,14915
3,E01000005,3,8678
4,E01000006,5,14486
...,...,...,...
32839,E01033764,1,116
32840,E01033765,1,945
32841,E01033766,4,12842
32842,E01033767,1,422


In [150]:
merseyside = merseyside.merge(LSAO_IMD, on='LSOA code', how='left')
london = london.merge(LSAO_IMD, on='LSOA code', how='left')

display(london.head(), merseyside.head())

Unnamed: 0,index,LSOA code,mean_house_price,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,...,Outcome linked to object of search,Removal of more than just outer clothing,index_right,distance,Borough,population,nonWhite,distStation,IMDDecile,IMDRank
0,28014.0,E01000001,11100000.0,Person search,2022-01-10 02:40:15,51.520699,-0.097601,Male,over 34,Other ethnic group - Not stated,...,False,False,0.0,160.042424,City of London,1474,19.88,495.0,9.0,29199.0
1,28014.0,E01000001,11100000.0,Person search,2022-01-10 12:37:39,51.518864,-0.097562,Male,over 34,Mixed/Multiple ethnic groups - White and Black...,...,True,False,0.0,116.270257,City of London,1474,19.88,507.0,9.0,29199.0
2,28014.0,E01000001,11100000.0,Person search,2022-01-20 05:14:39,51.520206,-0.097736,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,...,False,False,0.0,127.020561,City of London,1474,19.88,481.0,9.0,29199.0
3,28014.0,E01000001,11100000.0,Person search,2022-01-26 08:17:00,51.519703,-0.101649,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,...,,,0.0,374.125238,City of London,1474,19.88,213.0,9.0,29199.0
4,28014.0,E01000001,11100000.0,Person search,2022-02-04 08:59:46,51.517146,-0.098642,Male,18-24,Black/African/Caribbean/Black British - Caribbean,...,False,False,0.0,311.926748,City of London,1474,19.88,455.0,9.0,29199.0


Unnamed: 0,index,LSOA code,mean_house_price,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,...,Outcome linked to object of search,Removal of more than just outer clothing,index_right,distance,Borough,population,nonWhite,distStation,IMDDecile,IMDRank
0,22486.0,E01006434,124940.0,Person and Vehicle search,2022-01-16 16:57:20,53.476675,-2.875641,Male,18-24,Mixed/Multiple ethnic groups - Any other Mixed...,...,,False,6433.0,171.438495,Knowsley,1518,1.91,1811.0,1.0,694.0
1,22486.0,E01006434,124940.0,Person search,2022-01-17 02:03:41,53.476675,-2.875641,Male,25-34,White - English/Welsh/Scottish/Northern Irish/...,...,,False,6433.0,171.438495,Knowsley,1518,1.91,1811.0,1.0,694.0
2,22486.0,E01006434,124940.0,Person search,2022-01-19 18:37:52,53.475239,-2.867928,Male,over 34,White - English/Welsh/Scottish/Northern Irish/...,...,,False,6433.0,454.001372,Knowsley,1518,1.91,2088.0,1.0,694.0
3,22486.0,E01006434,124940.0,Person search,2022-01-25 11:48:55,53.47409,-2.877502,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,...,,False,6433.0,221.084998,Knowsley,1518,1.91,2087.0,1.0,694.0
4,22486.0,E01006434,124940.0,Person search,2022-01-31 11:23:07,53.474593,-2.876307,Male,25-34,Asian/Asian British - Any other Asian background,...,,False,6433.0,124.013202,Knowsley,1518,1.91,2037.0,1.0,694.0


## Aggregate Stop and Search Rate

could maybe do this way before

In [151]:


stops_per_lsoa_merseyside = merseyside.groupby('LSOA code')['Outcome'].count().reset_index()
stops_per_lsoa_london = london.groupby('LSOA code')['Outcome'].count().reset_index()


stops_per_lsoa = stops_per_lsoa_merseyside.rename(columns={'Outcome': 'stop_count'})
merseyside = merseyside.merge( stops_per_lsoa, on='LSOA code', how='left')


stops_per_lsoa = stops_per_lsoa_london.rename(columns={'Outcome': 'stop_count'})
london = london.merge( stops_per_lsoa, on='LSOA code', how='left')


merseyside['stops_per_LSOA'] = round(merseyside['stop_count'] / merseyside['population'] * 100,2)
merseyside['stops_per_1000'] = round(merseyside['stop_count'] / 10,2)

london['stops_per_LSOA'] = round(london['stop_count'] / merseyside['population'] * 100,2)
london['stops_per_1000'] = round(london['stop_count'] / 10,2)


display(london.head(), merseyside.head())

Unnamed: 0,index,LSOA code,mean_house_price,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,...,distance,Borough,population,nonWhite,distStation,IMDDecile,IMDRank,stop_count,stops_per_LSOA,stops_per_1000
0,28014.0,E01000001,11100000.0,Person search,2022-01-10 02:40:15,51.520699,-0.097601,Male,over 34,Other ethnic group - Not stated,...,160.042424,City of London,1474,19.88,495.0,9.0,29199.0,94,6.19,9.4
1,28014.0,E01000001,11100000.0,Person search,2022-01-10 12:37:39,51.518864,-0.097562,Male,over 34,Mixed/Multiple ethnic groups - White and Black...,...,116.270257,City of London,1474,19.88,507.0,9.0,29199.0,94,6.19,9.4
2,28014.0,E01000001,11100000.0,Person search,2022-01-20 05:14:39,51.520206,-0.097736,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,...,127.020561,City of London,1474,19.88,481.0,9.0,29199.0,94,6.19,9.4
3,28014.0,E01000001,11100000.0,Person search,2022-01-26 08:17:00,51.519703,-0.101649,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,...,374.125238,City of London,1474,19.88,213.0,9.0,29199.0,94,6.19,9.4
4,28014.0,E01000001,11100000.0,Person search,2022-02-04 08:59:46,51.517146,-0.098642,Male,18-24,Black/African/Caribbean/Black British - Caribbean,...,311.926748,City of London,1474,19.88,455.0,9.0,29199.0,94,6.19,9.4


Unnamed: 0,index,LSOA code,mean_house_price,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,...,distance,Borough,population,nonWhite,distStation,IMDDecile,IMDRank,stop_count,stops_per_LSOA,stops_per_1000
0,22486.0,E01006434,124940.0,Person and Vehicle search,2022-01-16 16:57:20,53.476675,-2.875641,Male,18-24,Mixed/Multiple ethnic groups - Any other Mixed...,...,171.438495,Knowsley,1518,1.91,1811.0,1.0,694.0,114,7.51,11.4
1,22486.0,E01006434,124940.0,Person search,2022-01-17 02:03:41,53.476675,-2.875641,Male,25-34,White - English/Welsh/Scottish/Northern Irish/...,...,171.438495,Knowsley,1518,1.91,1811.0,1.0,694.0,114,7.51,11.4
2,22486.0,E01006434,124940.0,Person search,2022-01-19 18:37:52,53.475239,-2.867928,Male,over 34,White - English/Welsh/Scottish/Northern Irish/...,...,454.001372,Knowsley,1518,1.91,2088.0,1.0,694.0,114,7.51,11.4
3,22486.0,E01006434,124940.0,Person search,2022-01-25 11:48:55,53.47409,-2.877502,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,...,221.084998,Knowsley,1518,1.91,2087.0,1.0,694.0,114,7.51,11.4
4,22486.0,E01006434,124940.0,Person search,2022-01-31 11:23:07,53.474593,-2.876307,Male,25-34,Asian/Asian British - Any other Asian background,...,124.013202,Knowsley,1518,1.91,2037.0,1.0,694.0,114,7.51,11.4


# Final Touches

### Remove useless columns


In [152]:
cols_to_drop = ['index', 'index_right',  'distance']

merseyside = merseyside.drop(columns=cols_to_drop)
london = london.drop(columns=cols_to_drop)

### Rename columns to be more efficient

In [157]:
cols = ['LSOA21CD', 'hprice', 'type', 'date', 'lat', 'long', 'gender', 'age', 'selfEthnic', 'officerEthnic', 'leg', 'obj', 'outcome',
        'link', 'rmCloth', 'borough', 'pop', 'nonWhite', 'distStation', 'IMDDecile', 'IMDRank', 'stopsLSOAtotal', 'stopsLSOA', 'stops1000' ]

merseyside.columns = cols
london.columns = cols

display(merseyside, london)

Unnamed: 0,LSOA21CD,hprice,type,date,lat,long,gender,age,selfEthnic,officerEthnic,...,rmCloth,borough,pop,nonWhite,distStation,IMDDecile,IMDRank,stopsLSOAtotal,stopsLSOA,stops1000
0,E01006434,124940.0000,Person and Vehicle search,2022-01-16 16:57:20,53.476675,-2.875641,Male,18-24,Mixed/Multiple ethnic groups - Any other Mixed...,Black,...,False,Knowsley,1518,1.91,1811.0,1.0,694.0,114,7.51,11.4
1,E01006434,124940.0000,Person search,2022-01-17 02:03:41,53.476675,-2.875641,Male,25-34,White - English/Welsh/Scottish/Northern Irish/...,White,...,False,Knowsley,1518,1.91,1811.0,1.0,694.0,114,7.51,11.4
2,E01006434,124940.0000,Person search,2022-01-19 18:37:52,53.475239,-2.867928,Male,over 34,White - English/Welsh/Scottish/Northern Irish/...,White,...,False,Knowsley,1518,1.91,2088.0,1.0,694.0,114,7.51,11.4
3,E01006434,124940.0000,Person search,2022-01-25 11:48:55,53.474090,-2.877502,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,...,False,Knowsley,1518,1.91,2087.0,1.0,694.0,114,7.51,11.4
4,E01006434,124940.0000,Person search,2022-01-31 11:23:07,53.474593,-2.876307,Male,25-34,Asian/Asian British - Any other Asian background,Asian,...,False,Knowsley,1518,1.91,2037.0,1.0,694.0,114,7.51,11.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37780,E01034836,146279.5293,,NaT,,,,,,,...,,Wirral,1253,7.10,,,,0,0.00,0.0
37781,E01034837,146279.5293,,NaT,,,,,,,...,,Wirral,1638,6.59,,,,0,0.00,0.0
37782,E01034838,146279.5293,,NaT,,,,,,,...,,Wirral,1022,8.41,,,,0,0.00,0.0
37783,E01034839,146279.5293,,NaT,,,,,,,...,,Wirral,1043,7.86,,,,0,0.00,0.0


Unnamed: 0,LSOA21CD,hprice,type,date,lat,long,gender,age,selfEthnic,officerEthnic,...,rmCloth,borough,pop,nonWhite,distStation,IMDDecile,IMDRank,stopsLSOAtotal,stopsLSOA,stops1000
0,E01000001,1.110000e+07,Person search,2022-01-10 02:40:15,51.520699,-0.097601,Male,over 34,Other ethnic group - Not stated,Black,...,False,City of London,1474,19.88,495.0,9.0,29199.0,94,6.19,9.4
1,E01000001,1.110000e+07,Person search,2022-01-10 12:37:39,51.518864,-0.097562,Male,over 34,Mixed/Multiple ethnic groups - White and Black...,Black,...,False,City of London,1474,19.88,507.0,9.0,29199.0,94,6.19,9.4
2,E01000001,1.110000e+07,Person search,2022-01-20 05:14:39,51.520206,-0.097736,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,...,False,City of London,1474,19.88,481.0,9.0,29199.0,94,6.19,9.4
3,E01000001,1.110000e+07,Person search,2022-01-26 08:17:00,51.519703,-0.101649,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,...,,City of London,1474,19.88,213.0,9.0,29199.0,94,6.19,9.4
4,E01000001,1.110000e+07,Person search,2022-02-04 08:59:46,51.517146,-0.098642,Male,18-24,Black/African/Caribbean/Black British - Caribbean,Black,...,False,City of London,1474,19.88,455.0,9.0,29199.0,94,6.19,9.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140289,E01004665,7.260344e+05,Person search,2022-12-16 20:20:00,51.485949,-0.141768,Male,18-24,Other ethnic group - Any other ethnic group,Other,...,,Westminster,1456,60.10,1062.0,2.0,5376.0,121,,12.1
140290,E01004665,7.260344e+05,Person search,2022-12-16 20:20:00,51.485949,-0.141768,Male,18-24,Asian/Asian British - Any other Asian background,Asian,...,,Westminster,1456,60.10,1062.0,2.0,5376.0,121,,12.1
140291,E01004665,7.260344e+05,Person search,2022-12-16 20:20:00,51.485949,-0.141768,Male,18-24,Other ethnic group - Any other ethnic group,Other,...,,Westminster,1456,60.10,1062.0,2.0,5376.0,121,,12.1
140292,E01004665,7.260344e+05,Person search,2022-12-16 20:25:00,51.485949,-0.141768,Male,18-24,White - Any other White background,White,...,,Westminster,1456,60.10,1062.0,2.0,5376.0,121,,12.1


### Save to CSV

In [158]:
merseyside.to_csv(f'../data/merseyside{csv_names[1].split('/')[3][:4]}.csv')
london.to_csv(f'../data/london{csv_names[1].split('/')[3][:4]}.csv')