# Creating the London and Merseyside CSVs

In [None]:

import os
import pandas as pd 
from pyproj import Transformer


import geopandas as gpd
from shapely.geometry import Point


In [3]:
path = '../data/police_zips'    



# find all csv file paths
csv_names = []
for folder in os.listdir(path):
        
            folder_contents = os.listdir(f'{path}/{folder}')
            
            for file in folder_contents:
                
                    
                csv_names.append(f'{path}/{folder}/{file}')



csv_names
        

['../data/police_zips/2022-01/2022-01-city-of-london-outcomes.csv',
 '../data/police_zips/2022-01/2022-01-city-of-london-stop-and-search.csv',
 '../data/police_zips/2022-01/2022-01-city-of-london-street.csv',
 '../data/police_zips/2022-01/2022-01-merseyside-outcomes.csv',
 '../data/police_zips/2022-01/2022-01-merseyside-stop-and-search.csv',
 '../data/police_zips/2022-01/2022-01-merseyside-street.csv',
 '../data/police_zips/2022-01/2022-01-metropolitan-outcomes.csv',
 '../data/police_zips/2022-01/2022-01-metropolitan-stop-and-search.csv',
 '../data/police_zips/2022-01/2022-01-metropolitan-street.csv',
 '../data/police_zips/2022-02/2022-02-city-of-london-outcomes.csv',
 '../data/police_zips/2022-02/2022-02-city-of-london-stop-and-search.csv',
 '../data/police_zips/2022-02/2022-02-city-of-london-street.csv',
 '../data/police_zips/2022-02/2022-02-merseyside-outcomes.csv',
 '../data/police_zips/2022-02/2022-02-merseyside-stop-and-search.csv',
 '../data/police_zips/2022-02/2022-02-merseysid

# Extract the CSV's

In [143]:
london_ss = []
london_crimes = []
london_outcomes = []

merseyside_ss = []
merseyside_crimes = []
merseyside_outcomes = []


for csv in csv_names:
 

    # City of London Data
    if csv.endswith('city-of-london-stop-and-search.csv'):
        london_ss.append(pd.read_csv(csv))

    elif csv.endswith('city-of-london-street.csv'):
        london_crimes.append(pd.read_csv(csv))

    elif csv.endswith('city-of-london-outcomes.csv'):
        london_outcomes.append(pd.read_csv(csv))



    # MET Polcie Data
    elif csv.endswith('metropolitan-stop-and-search.csv'):
        london_ss.append(pd.read_csv(csv))

    elif csv.endswith('metropolitan-street.csv'):
        london_crimes.append(pd.read_csv(csv))

    elif csv.endswith('metropolitan-outcomes.csv'):
        london_outcomes.append(pd.read_csv(csv))


    # Merseyside Data
    elif csv.endswith('merseyside-stop-and-search.csv'):
        merseyside_ss.append(pd.read_csv(csv))

    elif csv.endswith('merseyside-street.csv'):
        merseyside_crimes.append(pd.read_csv(csv))

    elif csv.endswith('merseyside-outcomes.csv'):
        merseyside_outcomes.append(pd.read_csv(csv))





# Convert to Pandas df
london_ss_df = pd.concat(london_ss, ignore_index=True)
london_crimes_df = pd.concat(london_crimes, ignore_index=True)
london_outcomes_df = pd.concat(london_outcomes, ignore_index=True)

# Convert to Pandas df
merseyside_ss_df = pd.concat(merseyside_ss, ignore_index=True)
merseyside_crimes_df = pd.concat(merseyside_crimes, ignore_index=True)
merseyside_outcomes_df = pd.concat(merseyside_outcomes, ignore_index=True)

### Combine the crime and outcomes by 'CRIME ID'

In [144]:
# merge London crimes with outcomes
london_outcomes_filtered = london_outcomes_df[['Crime ID', 'Outcome type']]  # Select only necessary columns
london_crimes_with_outcomes = london_crimes_df.merge(london_outcomes_filtered, on='Crime ID', how='left')


# merge Merseyside crimes with outcomes
merseyside_outcomes_filtered = merseyside_outcomes_df[['Crime ID', 'Outcome type']]  # Select only necessary columns
merseyside_crimes_with_outcomes = merseyside_crimes_df.merge(merseyside_outcomes_filtered, on='Crime ID', how='left')

# display(london_crimes_with_outcomes)
# display(merseyside_crimes_with_outcomes)

### Convert to datetime


In [145]:
# Convert the date column to datetime format
london_crimes_with_outcomes['Date'] = pd.to_datetime(london_crimes_with_outcomes['Month'], format= '%Y-%m')
london_ss_df['Date'] = pd.to_datetime(london_ss_df['Date']).dt.tz_convert(None) # convert to datetime and remove timezone
london_ss_df.drop(columns=['Part of a policing operation', 'Policing operation'], inplace=True)


merseyside_crimes_with_outcomes['Date'] = pd.to_datetime(merseyside_crimes_with_outcomes['Month'], format= '%Y-%m')
merseyside_ss_df['Date'] = pd.to_datetime(merseyside_ss_df['Date']).dt.tz_convert(None) # convert to datetime and remove timezone
merseyside_ss_df.drop(columns=['Part of a policing operation', 'Policing operation'], inplace=True) # remove empty columns




## Save to CSV

In [7]:
# years = london_ss_df['Date'].dt.year.value_counts().index
# years = '2022'
# ss_path = '../data/stop_and_search/'
# crime_path = '../data/crime/'


# for year in years:
#     london_ss_df[london_ss_df['Date'].dt.year == year].to_csv(f'{ss_path}london_stop_and_search_{year}.csv', index=False)
#     merseyside_ss_df[merseyside_ss_df['Date'].dt.year == year].to_csv(f'{ss_path}merseyside_stop_and_search_{year}.csv', index=False)


#     merseyside_crimes_with_outcomes[merseyside_crimes_with_outcomes['Date'].dt.year == year].to_csv(f'{crime_path}merseyside_crime_{year}.csv', index=False)
#     london_crimes_with_outcomes[london_crimes_with_outcomes['Date'].dt.year == year].to_csv(f'{crime_path}london_crime_{year}.csv', index=False)


# Add House Prices

In [8]:
# Read the 5th sheet (index 4, since indexing starts from 0)
xls = pd.read_excel('../data/house_prices/LSOA_house_prices.xls', 
                    sheet_name=5, 
                    engine='xlrd')

# Display the first few rows
display(xls)

Unnamed: 0,Contents,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 103,Unnamed: 104,Unnamed: 105,Unnamed: 106,Unnamed: 107,Unnamed: 108,Unnamed: 109,Unnamed: 110,Unnamed: 111,Unnamed: 112
0,Table 1,,,,,,,,,,...,,,,,,,,,,
1,Mean price paid for residential properties by ...,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,Local authority code,Local authority name,LSOA code,LSOA name,Year ending Dec 1995,Year ending Mar 1996,Year ending Jun 1996,Year ending Sep 1996,Year ending Dec 1996,Year ending Mar 1997,...,Year ending Sep 2020,Year ending Dec 2020,Year ending Mar 2021,Year ending Jun 2021,Year ending Sep 2021,Year ending Dec 2021,Year ending Mar 2022,Year ending Jun 2022,Year ending Sep 2022,Year ending Dec 2022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34753,W06000024,Merthyr Tydfil,W01001320,Merthyr Tydfil 007C,39867.727273,42240.238095,44742.037037,51591.184211,51267.5,52429.756098,...,153100,169125,181729.16667,161261.11111,156668.42105,153089.47368,165997.22222,171191.66667,192085.29412,199525
34754,W06000024,Merthyr Tydfil,W01001321,Merthyr Tydfil 007D,24307.1875,25881.923077,25183.214286,25051.363636,24506.25,20812.5,...,111625,122352.94118,127416.66667,135097.77778,142794.83871,142989.71429,142132.57143,131966.66667,131250,122652.17391
34755,W06000024,Merthyr Tydfil,W01001322,Merthyr Tydfil 007E,23715.625,33924.75,34800.833333,31015.869565,31882.678571,26698.846154,...,124181.05263,115218.61111,122119.34783,129935,129680.86207,142870.68966,142972.22222,154142.85714,152500,179411.76471
34756,W06000024,Merthyr Tydfil,W01001324,Merthyr Tydfil 003E,45982.65625,43623.166667,42017.115385,44035.185185,41017.307692,37119.565217,...,124270.83333,151395.83333,200025,192460,198424.07407,172061.12903,157373.96552,160673.03571,171343.26087,206715.625


In [9]:

headings = xls.iloc[4]
house_prices = xls.iloc[5:]

# headings
house_prices.columns = headings

house_prices = house_prices[['Local authority code','LSOA code', 
                             'Year ending Jun 2022', 'Year ending Sep 2022', 'Year ending Dec 2022']]


house_prices['£_mean_house_price'] = (
    house_prices.iloc[:, 2:]
    .apply(pd.to_numeric, errors='coerce')  # Convert non-numeric values to NaN
    .mean(axis=1)
    .round()
)

house_prices = house_prices[['LSOA code', '£_mean_house_price']]
house_prices 


4,LSOA code,£_mean_house_price
5,E01011949,100415.0
6,E01011950,55639.0
7,E01011951,71779.0
8,E01011952,77052.0
9,E01011953,95933.0
...,...,...
34753,W01001320,187601.0
34754,W01001321,128623.0
34755,W01001322,162018.0
34756,W01001324,179577.0


In [10]:
xlsx = pd.read_excel('../data/house_prices/LSOA_populations.xlsx', 
                     sheet_name=6,  # Ensure the correct sheet index
                     engine='openpyxl')  # Correct engine for .xlsx files

# Display the first few rows
display(xlsx)



Unnamed: 0,"Estimates by single year of age and sex for 2021 Lower layer Super Output Areas, mid-2021",Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 177,Unnamed: 178,Unnamed: 179,Unnamed: 180,Unnamed: 181,Unnamed: 182,Unnamed: 183,Unnamed: 184,Unnamed: 185,Unnamed: 186
0,This worksheet contains one table.,,,,,,,,,,...,,,,,,,,,,
1,To turn off freeze panes select the 'View' rib...,,,,,,,,,,...,,,,,,,,,,
2,LAD 2021 Code,LAD 2021 Name,LSOA 2021 Code,LSOA 2021 Name,Total,F0,F1,F2,F3,F4,...,M81,M82,M83,M84,M85,M86,M87,M88,M89,M90
3,E06000001,Hartlepool,E01011949,Hartlepool 009A,1854,5,10,10,10,0,...,4,5,6,6,6,0,0,6,0,0
4,E06000001,Hartlepool,E01011950,Hartlepool 008A,1037,7,5,10,5,5,...,0,5,0,6,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35670,W06000024,Merthyr Tydfil,W01001324,Merthyr Tydfil 003E,1892,6,5,10,14,5,...,4,15,5,0,4,6,0,0,7,6
35671,W06000024,Merthyr Tydfil,W01001898,Merthyr Tydfil 008F,1448,5,5,5,5,5,...,5,5,0,4,5,0,0,0,7,3
35672,W06000024,Merthyr Tydfil,W01001959,Merthyr Tydfil 005E,1535,3,5,10,10,5,...,0,0,5,0,0,0,7,0,0,0
35673,W06000024,Merthyr Tydfil,W01001960,Merthyr Tydfil 005F,1458,2,5,10,10,10,...,5,5,0,0,5,0,0,0,0,4


In [208]:
headings = xlsx.iloc[2]
LSOA_pop = xlsx.iloc[3:]

headings
LSOA_pop.columns = headings
LSOA_pop = LSOA_pop[['LSOA 2021 Code', 'Total']]
LSOA_pop.columns = ['LSOA code', 'LSOA population']

LSOA_pop


Unnamed: 0,LSOA code,LSOA population
3,E01011949,1854
4,E01011950,1037
5,E01011951,1203
6,E01011952,1610
7,E01011953,1970
...,...,...
35670,W01001324,1892
35671,W01001898,1448
35672,W01001959,1535
35673,W01001960,1458


In [209]:
data_22 = house_prices.merge(LSOA_pop, on='LSOA code', how='left')

data_22

Unnamed: 0,LSOA code,£_mean_house_price,LSOA population
0,E01011949,100415.0,1854
1,E01011950,55639.0,1037
2,E01011951,71779.0,1203
3,E01011952,77052.0,1610
4,E01011953,95933.0,1970
...,...,...,...
34748,W01001320,187601.0,1437
34749,W01001321,128623.0,1629
34750,W01001322,162018.0,1647
34751,W01001324,179577.0,1892


## Create Scores

In [None]:
mean_pop = round(data_22['LSOA population'].mean())  # Ensure rounding
mean_house = round(data_22['£_mean_house_price'].mean())  # Ensure rounding

data_22['LSOA population'] = data_22['LSOA population'].fillna(mean_pop).astype(int)  
data_22['£_mean_house_price'] = data_22['£_mean_house_price'].fillna(mean_house).astype(int)  

# Check for remaining NaN values
print(data_22.isna().sum())

data_22  


LSOA code             0
£_mean_house_price    0
pop_sum               0
dtype: int64


Unnamed: 0,LSOA code,£_mean_house_price,pop_sum
0,E01011949,100415,1854
1,E01011950,55639,1037
2,E01011951,71779,1203
3,E01011952,77052,1610
4,E01011953,95933,1970
...,...,...,...
34748,W01001320,187601,1437
34749,W01001321,128623,1629
34750,W01001322,162018,1647
34751,W01001324,179577,1892


## Add crime rates to LSOA

In [218]:




merseyside_22 = merseyside_crimes_with_outcomes.merge(data_22, on='LSOA code', how='left')
london_22 = london_crimes_with_outcomes.merge(data_22, on='LSOA code', how='left')



merseyside_22 = merseyside_22[['Date', 'LSOA name', 'LSOA code', 'Crime type', '£_mean_house_price', 'LSOA population','Last outcome category', 'Outcome type']]
london_22 = london_22[['Date', 'LSOA name', 'LSOA code', 'Crime type', '£_mean_house_price', 'LSOA population','Last outcome category', 'Outcome type']]



merseyside_22['Drug crime rate'] = (merseyside_22['Crime type'] == 'Drugs').astype(int)
london_22['Drug crime rate'] = (london_22['Crime type'] == 'Drugs').astype(int)


merseyside_22['Violent crime rate'] = (merseyside_22['Crime type'] == 'Vehicle crime').astype(int)
london_22['Violent crime rate'] = (london_22['Crime type'] == 'Vehicle crime').astype(int)



prosecuted_outcomes = [
    'Suspect charged', 'Action to be taken by another organisation', 
    'Suspect charged as part of another case', 'Offender given penalty notice'
]



merseyside_22['Prosecution rate'] = merseyside_22['Last outcome category'].isin(prosecuted_outcomes).astype(int)
london_22['Prosecution rate'] = london_22['Last outcome category'].isin(prosecuted_outcomes).astype(int)


merseyside_22 = merseyside_22.groupby('LSOA code', as_index=False).agg({
    'LSOA population': 'first',
    '£_mean_house_price': 'first',
    'Crime type': 'count',
    'Drug crime rate': 'mean',
    'Violent crime rate': 'mean',
    'Prosecution rate': 'mean'})
london_22 = london_22.groupby('LSOA code', as_index=False).agg({
    'LSOA population': 'first',
    '£_mean_house_price': 'first',
    'Crime type': 'count',
    'Drug crime rate': 'mean',
    'Violent crime rate': 'mean',
    'Prosecution rate': 'mean'})







merseyside_22['Crime rate'] = merseyside_22['Crime type'] / merseyside_22['LSOA population']
london_22['Crime rate'] = london_22['Crime type'] / london_22['LSOA population']


merseyside_22 = merseyside_22.drop(columns={'Crime type'})
london_22 = london_22.drop(columns={'Crime type'})



merseyside_22[['Drug crime rate', 
    'Violent crime rate', 
    'Prosecution rate', 
    'Crime rate']] = round(merseyside_22[['Drug crime rate', 
                               'Violent crime rate', 
                               'Prosecution rate', 
                               'Crime rate']] *1000,2)
london_22[['Drug crime rate', 
    'Violent crime rate', 
    'Prosecution rate', 
    'Crime rate']] = round(london_22[['Drug crime rate', 
                               'Violent crime rate', 
                               'Prosecution rate', 
                               'Crime rate']] *1000,2)



display(london_22)
display(merseyside_22)


Unnamed: 0,LSOA code,LSOA population,£_mean_house_price,Drug crime rate,Violent crime rate,Prosecution rate,Crime rate
0,E01000001,1573,942221.0,55.78,131.47,11.95,159.567705
1,E01000002,1407,1035931.0,30.73,13.97,0.00,254.442075
2,E01000003,1610,605185.0,15.38,61.54,0.00,80.745342
3,E01000005,1104,,66.67,48.98,5.44,665.76087
4,E01000006,1829,277604.0,107.69,161.54,23.08,71.077091
...,...,...,...,...,...,...,...
7872,W01001870,1420,226517.0,0.00,0.00,0.00,1.408451
7873,W01001897,1854,363184.0,0.00,0.00,0.00,1.078749
7874,W01001940,1049,178192.0,0.00,0.00,0.00,0.953289
7875,W01001953,1495,298163.0,0.00,0.00,0.00,0.668896


Unnamed: 0,LSOA code,LSOA population,£_mean_house_price,Drug crime rate,Violent crime rate,Prosecution rate,Crime rate
0,E01006225,1557,196182.0,500.00,0.00,0.0,1.284522
1,E01006264,1237,239444.0,0.00,333.33,0.0,2.425222
2,E01006346,2077,191080.0,0.00,0.00,0.0,0.481464
3,E01006366,1823,231052.0,0.00,0.00,0.0,1.645639
4,E01006368,1876,188987.0,250.00,0.00,0.0,2.132196
...,...,...,...,...,...,...,...
965,E01034836,,,222.22,0.00,0.0,
966,E01034837,,,200.00,0.00,0.0,
967,E01034838,,,285.71,142.86,0.0,
968,E01034839,,,181.82,0.00,0.0,


# Adding LSOA to Stop and Search

In [137]:
LSOA_df = pd.read_csv('../data/mapping_csvs/LSOA_to_coords.csv')
LSOA_df

Unnamed: 0,FID,LSOA01CD,LSOA01NM,GlobalID,x,y
0,1,E01000001,City of London 001A,3f778391-d557-47f0-91ab-257ee265c5c6,532182,181785
1,2,E01000002,City of London 001B,1cdc7e6c-a108-4410-80d5-dc6623582676,532434,181810
2,3,E01000003,City of London 001C,95f673aa-8bc2-447d-88e6-f15d77bd5492,532199,182064
3,4,E01000004,City of London 001D,408281cc-db8a-44c3-a611-d7497ef3545e,532107,181183
4,5,E01000005,City of London 001E,03ed3f20-2f4d-4870-bb54-5bd372c80e68,533678,181182
...,...,...,...,...,...,...
34373,34374,W01001892,Cardiff 020D,5b54ed80-cdb0-445f-9f1c-b389ed8b046b,316112,179404
34374,34375,W01001893,Cardiff 010B,b6beb227-3850-4ac1-a5c0-c0cabf9d236d,313519,182234
34375,34376,W01001894,Cardiff 010C,468e8bc5-d07d-4ecf-9c59-3b5f96abec6c,314661,181119
34376,34377,W01001895,Cardiff 010D,37741ec6-5ed5-4526-b5a3-8235720d3903,314674,180614


## Convert Ordnance Survey National Grid to longitude/latitude

In [138]:
import numpy as np
from pyproj import Transformer

# Define transformer from OSGB36 (EPSG:27700) to WGS84 (EPSG:4326)
transformer = Transformer.from_crs("EPSG:27700", "EPSG:4326", always_xy=True)

def convert_uk_coords(eastings, northings):
    """Vectorised function to convert Eastings/Northings to Longitude/Latitude."""
    longitudes, latitudes = transformer.transform(eastings, northings)
    return longitudes, latitudes


df = pd.DataFrame()

# Example DataFrame
LSOA_df['Longitude'], LSOA_df['Latitude'] = convert_uk_coords(LSOA_df['x'], LSOA_df['y'])


LSOA_df = LSOA_df[['LSOA01CD', 'Longitude', 'Latitude']]
LSOA_df

Unnamed: 0,LSOA01CD,Longitude,Latitude
0,E01000001,-0.096266,51.519526
1,E01000002,-0.092626,51.519692
2,E01000003,-0.095916,51.522029
3,E01000004,-0.097571,51.514134
4,E01000005,-0.074945,51.513756
...,...,...,...
34373,W01001892,-3.210109,51.507316
34374,W01001893,-3.248157,51.532365
34375,W01001894,-3.231427,51.522516
34376,W01001895,-3.231117,51.517978


# Assign each Coordinate in Stop and search data to LSOA

using geopandas

In [220]:


# Convert Merseyside crime DataFrame to GeoDataFrame
gdf_ss_m = gpd.GeoDataFrame(merseyside_ss_df, 
                              geometry=gpd.points_from_xy(merseyside_ss_df['Longitude'], 
                                                          merseyside_ss_df['Latitude']),
                              crs="EPSG:4326")  # WGS84 (standard lat/lon)
# Convert London crime DataFrame to GeoDataFrame
gdf_ss_l = gpd.GeoDataFrame(london_ss_df, 
                              geometry=gpd.points_from_xy(london_ss_df['Longitude'], 
                                                          london_ss_df['Latitude']),
                              crs="EPSG:4326")  # WGS84 (standard lat/lon)



# Convert LSOA centroids DataFrame to GeoDataFrame
gdf_lsoa = gpd.GeoDataFrame(LSOA_df, 
                            geometry=gpd.points_from_xy(LSOA_df['Longitude'], 
                                                        LSOA_df['Latitude']),
                            crs="EPSG:4326")


# Perform spatial join to associate each crime with its nearest LSOA
merged_df_m = gpd.sjoin_nearest(gdf_ss_m, gdf_lsoa, how="left")
merged_df_l = gpd.sjoin_nearest(gdf_ss_l, gdf_lsoa, how="left")



# Drop unnecessary columns and check results
merged_df_m = merged_df_m.drop(columns=['geometry', 'Longitude_right', 'Latitude_right'])  # Remove unnecessary columns if not needed
merged_df_l = merged_df_l.drop(columns=['geometry', 'Longitude_right', 'Latitude_right'])  # Remove unnecessary columns if not needed


merged_df_m = merged_df_m.rename(columns={'LSOA01CD': 'LSOA code',
                                      'Latitude_left': 'Latitude',
                                      'Longitude_left': 'Longitude'})
merged_df_l = merged_df_l.rename(columns={'LSOA01CD': 'LSOA code',
                                      'Latitude_left': 'Latitude',
                                      'Longitude_left': 'Longitude'})



# merge the stats
merged_df_m.merge(merseyside_22,on='LSOA code')
merged_df_l.merge(london_22,on='LSOA code')



display(merged_df_m)
display(merged_df_l)






Unnamed: 0,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing,index_right,LSOA code
0,Person search,2022-01-01 00:04:00,,,,,,,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,,
1,Person search,2022-01-01 00:07:45,,,,,,,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,,
2,Person search,2022-01-01 00:12:22,53.406618,-2.983878,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,6649.0,E01006650
3,Person search,2022-01-01 00:17:54,,,,,,,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,,
4,Person search,2022-01-01 00:22:40,53.403914,-2.981499,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Arrest,False,True,6510.0,E01006511
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53342,Person search,2022-12-31 23:50:36,53.466101,-2.921443,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,,False,6657.0,E01006658
53343,Person and Vehicle search,2022-12-31 23:55:04,53.463911,-2.976511,Male,25-34,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,7050.0,E01007051
53344,Person search,2022-12-31 23:55:52,53.466101,-2.921443,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,,False,6657.0,E01006658
53345,Person search,2022-12-31 23:56:03,53.388654,-3.035339,Male,over 34,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,7128.0,E01007129


Unnamed: 0,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing,index_right,LSOA code
0,Person search,2022-01-01 01:14:23,51.506255,-0.074901,Male,10-17,Other ethnic group - Not stated,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,0.0,0.0,4024.0,E01004025
1,Person search,2022-01-01 01:20:32,51.506255,-0.074901,Male,10-17,Other ethnic group - Not stated,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,0.0,0.0,4024.0,E01004025
2,Person search,2022-01-01 01:28:56,51.506255,-0.074901,Male,10-17,Asian/Asian British - Bangladeshi,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,0.0,0.0,4024.0,E01004025
3,Person search,2022-01-01 01:48:59,51.508066,-0.087780,Male,,,,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,0.0,0.0,3980.0,E01003981
4,Person search,2022-01-01 02:49:39,51.517680,-0.078484,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,0.0,0.0,4306.0,E01004307
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182559,Person and Vehicle search,2022-12-17 21:35:00,51.631560,-0.195095,Male,25-34,Asian/Asian British - Any other Asian background,Asian,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,,278.0,E01000279
182560,Person search,2022-12-17 21:57:00,,,Male,25-34,Other ethnic group - Not stated,White,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,Community resolution,,,,
182561,Person search,2022-12-17 22:00:00,51.514365,-0.143408,Female,over 34,White - Any other White background,White,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,,,4764.0,E01004765
182562,Person search,2022-12-17 22:16:00,,,Male,10-17,Black/African/Caribbean/Black British - Caribbean,Black,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,,,,


### Combine LSOA stats with stop and search

In [222]:
merseyside_df = merged_df_m.merge(merseyside_22, on='LSOA code' )
london_df = merged_df_l.merge(london_22, on='LSOA code' )


merseyside_df = merseyside_df[['Date', 'LSOA code', 'Latitude', 'Longitude', 'Type', 'Gender', 'Age range', 
               'Self-defined ethnicity', 'Officer-defined ethnicity', 
               'Legislation', 'Object of search', 'Outcome' , 'LSOA population',
               '£_mean_house_price', 'Drug crime rate', 'Violent crime rate',
               'Prosecution rate', 'Crime rate']]
london_df = london_df[['Date', 'LSOA code', 'Latitude', 'Longitude', 'Type', 'Gender', 'Age range', 
               'Self-defined ethnicity', 'Officer-defined ethnicity', 
               'Legislation', 'Object of search', 'Outcome' , 'LSOA population',
               '£_mean_house_price', 'Drug crime rate', 'Violent crime rate',
               'Prosecution rate', 'Crime rate']]

display(london_df)
display(merseyside_df)

Unnamed: 0,Date,LSOA code,Latitude,Longitude,Type,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,LSOA population,£_mean_house_price,Drug crime rate,Violent crime rate,Prosecution rate,Crime rate
0,2022-01-01 01:48:59,E01003981,51.508066,-0.087780,Person search,Male,,,,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,1514,589534.0,50.30,110.15,2.60,761.558785
1,2022-01-01 02:49:39,E01004307,51.517680,-0.078484,Person search,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,2385,887023.0,16.85,30.90,2.81,447.798742
2,2022-01-01 05:41:27,E01004307,51.518091,-0.078308,Person search,Male,18-24,Other ethnic group - Not stated,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,2385,887023.0,16.85,30.90,2.81,447.798742
3,2022-01-01 05:50:23,E01004307,51.518091,-0.078308,Person search,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,2385,887023.0,16.85,30.90,2.81,447.798742
4,2022-01-01 06:29:20,E01004307,51.516814,-0.081620,Person search,Male,18-24,Black/African/Caribbean/Black British - African,Black,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,2385,887023.0,16.85,30.90,2.81,447.798742
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152472,2022-12-17 20:50:00,E01001698,51.481655,0.063938,Person search,Male,over 34,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Arrest,1922,,82.09,70.90,11.19,139.438085
152473,2022-12-17 21:30:00,E01004579,51.472281,-0.166960,Person and Vehicle search,Male,,Black/African/Caribbean/Black British - Any ot...,Black,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,1907,1068514.0,45.64,132.78,0.00,126.376508
152474,2022-12-17 21:35:00,E01000279,51.631560,-0.195095,Person and Vehicle search,Male,25-34,Asian/Asian British - Any other Asian background,Asian,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,1404,2081351.0,25.97,168.83,0.00,54.843305
152475,2022-12-17 22:00:00,E01004765,51.514365,-0.143408,Person search,Female,over 34,White - Any other White background,White,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,1862,2589873.0,15.31,81.13,0.00,1403.329753


Unnamed: 0,Date,LSOA code,Latitude,Longitude,Type,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,LSOA population,£_mean_house_price,Drug crime rate,Violent crime rate,Prosecution rate,Crime rate
0,2022-01-01 00:33:49,E01006968,53.649608,-3.005882,Person search,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,127315.0,74.74,9.47,4.74,
1,2022-01-01 00:39:36,E01006881,53.452461,-2.738275,Person search,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,1409,133647.0,86.54,12.02,4.81,295.244855
2,2022-01-01 00:40:18,E01006968,53.649608,-3.005882,Vehicle search,,,,,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,127315.0,74.74,9.47,4.74,
3,2022-01-01 00:41:21,E01006968,53.649608,-3.005882,Person search,Male,25-34,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,,127315.0,74.74,9.47,4.74,
4,2022-01-01 00:48:11,E01007023,53.496940,-3.010570,Person search,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),,A no further action disposal,1402,352684.0,75.00,100.00,0.00,28.53067
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40978,2022-12-31 23:48:52,E01006630,53.388534,-2.972649,Person search,Female,over 34,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Community resolution,2144,153367.0,107.32,24.39,14.63,95.615672
40979,2022-12-31 23:50:36,E01006658,53.466101,-2.921443,Person search,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,1408,106175.0,109.95,15.71,15.71,135.653409
40980,2022-12-31 23:55:04,E01007051,53.463911,-2.976511,Person and Vehicle search,Male,25-34,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,1315,121833.0,96.26,21.39,10.70,142.205323
40981,2022-12-31 23:55:52,E01006658,53.466101,-2.921443,Person search,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,1408,106175.0,109.95,15.71,15.71,135.653409


# Adding distance to station

https://github.com/davwheat/uk-railway-stations/blob/main/stations.csv

In [185]:

stations_df = pd.read_csv('https://raw.githubusercontent.com/davwheat/uk-railway-stations/refs/heads/main/stations.csv')

stations_df.columns = ['Name', 'Latitude', 'Longitude', 'code#1', 'code#2']
stations_df = stations_df[['Longitude', 'Latitude']]

stations_df

Unnamed: 0,Longitude,Latitude
0,0.120343,51.490719
1,-3.230890,51.575363
2,-3.329549,51.642620
3,-3.443130,51.715019
4,-2.097464,57.143127
...,...,...
2619,-4.387464,55.892792
2620,-1.093159,53.957966
2621,-2.736450,52.809009
2622,-3.241342,51.640884


## Mapping the nearest station to the stop 

In [233]:
# Step 1: Convert Merseyside points to a GeoDataFrame
merseyside_map = gpd.GeoDataFrame(merseyside_df, 
                        geometry=gpd.points_from_xy(merseyside_df['Longitude'], 
                                                    merseyside_df['Latitude']),
                        crs="EPSG:4326")  # WGS84 (lat/lon)
london_map = gpd.GeoDataFrame(london_df, 
                        geometry=gpd.points_from_xy(london_df['Longitude'], 
                                                    london_df['Latitude']),
                        crs="EPSG:4326")  # WGS84 (lat/lon)


# Step 2: Convert Stations points to a GeoDataFrame
map_to_station = gpd.GeoDataFrame(stations_df, 
                        geometry=gpd.points_from_xy(stations_df['Longitude'], 
                                                    stations_df['Latitude']),
                        crs="EPSG:4326")  # WGS84 (lat/lon)

# Step 3: Reproject to British National Grid (EPSG:27700) for accurate distance measurements
merseyside_map = merseyside_map.to_crs("EPSG:27700")
london_map = london_map.to_crs("EPSG:27700")
map_to_station = map_to_station.to_crs("EPSG:27700")

# Step 4: Perform spatial join to map nearest train station to each Merseyside point
merseyside_results = gpd.sjoin_nearest(merseyside_map, map_to_station, how="left", distance_col="distance", lsuffix='_merseyside', rsuffix='_station')

london_results = gpd.sjoin_nearest(london_map, map_to_station, how="left", distance_col="distance", lsuffix='_london', rsuffix='_station')


# Step 5: Extract the list of distances
m_distances_list = merseyside_results['distance']
l_distances_list = london_results['distance']



In [234]:
merseyside_df['Distance to station'] = m_distances_list
london_df['Distance to station'] = l_distances_list

display(london_df)
display(merseyside_df)

Unnamed: 0,Date,LSOA code,Latitude,Longitude,Type,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,LSOA population,£_mean_house_price,Drug crime rate,Violent crime rate,Prosecution rate,Crime rate,Distance to station
0,2022-01-01 01:48:59,E01003981,51.508066,-0.087780,Person search,Male,,,,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,1514,589534.0,50.30,110.15,2.60,761.558785,335.924500
1,2022-01-01 02:49:39,E01004307,51.517680,-0.078484,Person search,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,2385,887023.0,16.85,30.90,2.81,447.798742,120.646302
2,2022-01-01 05:41:27,E01004307,51.518091,-0.078308,Person search,Male,18-24,Other ethnic group - Not stated,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,2385,887023.0,16.85,30.90,2.81,447.798742,145.029454
3,2022-01-01 05:50:23,E01004307,51.518091,-0.078308,Person search,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,2385,887023.0,16.85,30.90,2.81,447.798742,145.029454
4,2022-01-01 06:29:20,E01004307,51.516814,-0.081620,Person search,Male,18-24,Black/African/Caribbean/Black British - African,Black,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,2385,887023.0,16.85,30.90,2.81,447.798742,127.663400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152472,2022-12-17 20:50:00,E01001698,51.481655,0.063938,Person search,Male,over 34,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Arrest,1922,,82.09,70.90,11.19,139.438085,997.250464
152473,2022-12-17 21:30:00,E01004579,51.472281,-0.166960,Person and Vehicle search,Male,,Black/African/Caribbean/Black British - Any ot...,Black,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,1907,1068514.0,45.64,132.78,0.00,126.376508,935.167814
152474,2022-12-17 21:35:00,E01000279,51.631560,-0.195095,Person and Vehicle search,Male,25-34,Asian/Asian British - Any other Asian background,Asian,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,1404,2081351.0,25.97,168.83,0.00,54.843305,2111.728574
152475,2022-12-17 22:00:00,E01004765,51.514365,-0.143408,Person search,Female,over 34,White - Any other White background,White,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,1862,2589873.0,15.31,81.13,0.00,1403.329753,459.331123


Unnamed: 0,Date,LSOA code,Latitude,Longitude,Type,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,LSOA population,£_mean_house_price,Drug crime rate,Violent crime rate,Prosecution rate,Crime rate,Distance to station
0,2022-01-01 00:33:49,E01006968,53.649608,-3.005882,Person search,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,127315.0,74.74,9.47,4.74,,412.977447
1,2022-01-01 00:39:36,E01006881,53.452461,-2.738275,Person search,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,1409,133647.0,86.54,12.02,4.81,295.244855,532.783564
2,2022-01-01 00:40:18,E01006968,53.649608,-3.005882,Vehicle search,,,,,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,127315.0,74.74,9.47,4.74,,412.977447
3,2022-01-01 00:41:21,E01006968,53.649608,-3.005882,Person search,Male,25-34,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,,127315.0,74.74,9.47,4.74,,412.977447
4,2022-01-01 00:48:11,E01007023,53.496940,-3.010570,Person search,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),,A no further action disposal,1402,352684.0,75.00,100.00,0.00,28.53067,2201.565636
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40978,2022-12-31 23:48:52,E01006630,53.388534,-2.972649,Person search,Female,over 34,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Community resolution,2144,153367.0,107.32,24.39,14.63,95.615672,737.738009
40979,2022-12-31 23:50:36,E01006658,53.466101,-2.921443,Person search,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,1408,106175.0,109.95,15.71,15.71,135.653409,1066.977527
40980,2022-12-31 23:55:04,E01007051,53.463911,-2.976511,Person and Vehicle search,Male,25-34,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,1315,121833.0,96.26,21.39,10.70,142.205323,928.098767
40981,2022-12-31 23:55:52,E01006658,53.466101,-2.921443,Person search,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,1408,106175.0,109.95,15.71,15.71,135.653409,1066.977527
