# Creating the London and Merseyside CSVs

In [None]:

import os
import pandas as pd 
from pyproj import Transformer


import geopandas as gpd
from shapely.geometry import Point


# Load the data in

https://data.police.uk/data/

In [26]:
path = '../data/police_zips'    



# find all csv file paths
csv_names = []
for folder in os.listdir(path):
        
            folder_contents = os.listdir(f'{path}/{folder}')
            
            for file in folder_contents:
                
                    
                csv_names.append(f'{path}/{folder}/{file}')



print(f'There are {len(csv_names)} different CSVs')
        

There are 108 different CSVs


# Extract the CSV's

In [124]:
# Initialise storage 
london_ss, london_crimes, london_outcomes = [], [], []
merseyside_ss, merseyside_crimes, merseyside_outcomes = [], [], []

# loop through the csvs
for csv in csv_names:
    
    if "stop-and-search.csv" in csv:
        if "city-of-london" in csv or "metropolitan" in csv:
            london_ss.append(pd.read_csv(csv))
        elif "merseyside" in csv:
            merseyside_ss.append(pd.read_csv(csv))

    elif "street.csv" in csv:
        if "city-of-london" in csv or "metropolitan" in csv:
            london_crimes.append(pd.read_csv(csv))
        elif "merseyside" in csv:
            merseyside_crimes.append(pd.read_csv(csv))

    elif "outcomes.csv" in csv:
        if "city-of-london" in csv or "metropolitan" in csv:
            london_outcomes.append(pd.read_csv(csv))
        elif "merseyside" in csv:
            merseyside_outcomes.append(pd.read_csv(csv))

# Convert lists to DataFrames
london_ss_df = pd.concat(london_ss, ignore_index=True)
london_crimes_df = pd.concat(london_crimes, ignore_index=True)
london_outcomes_df = pd.concat(london_outcomes, ignore_index=True)

merseyside_ss_df = pd.concat(merseyside_ss, ignore_index=True)
merseyside_crimes_df = pd.concat(merseyside_crimes, ignore_index=True)
merseyside_outcomes_df = pd.concat(merseyside_outcomes, ignore_index=True)


### Combine the crime and outcomes by 'CRIME ID'

In [125]:
# merge London crimes with outcomes
london_crimes_with_outcomes = london_crimes_df.merge(london_outcomes_df[['Crime ID', 'Outcome type']], 
                                                     on='Crime ID', how='left')


# merge Merseyside crimes with outcomes
merseyside_crimes_with_outcomes = merseyside_crimes_df.merge(merseyside_outcomes_df[['Crime ID', 'Outcome type']]
                                                             , on='Crime ID', how='left')


### Convert to datetime


In [126]:
# Convert the date column to datetime format
london_crimes_with_outcomes['Date'] = pd.to_datetime(london_crimes_with_outcomes['Month'], format= '%Y-%m')
london_ss_df['Date'] = pd.to_datetime(london_ss_df['Date']).dt.tz_convert(None) # convert to datetime and remove timezone
london_ss_df.drop(columns=['Part of a policing operation', 'Policing operation'], inplace=True) # remove empty columns


merseyside_crimes_with_outcomes['Date'] = pd.to_datetime(merseyside_crimes_with_outcomes['Month'], format= '%Y-%m')
merseyside_ss_df['Date'] = pd.to_datetime(merseyside_ss_df['Date']).dt.tz_convert(None) # convert to datetime and remove timezone
merseyside_ss_df.drop(columns=['Part of a policing operation', 'Policing operation'], inplace=True) # remove empty columns

In [136]:
merseyside_ss_df

Unnamed: 0,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing
0,Person search,2022-01-01 00:04:00,,,,,,,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False
1,Person search,2022-01-01 00:07:45,,,,,,,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False
2,Person search,2022-01-01 00:12:22,53.406618,-2.983878,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False
3,Person search,2022-01-01 00:17:54,,,,,,,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False
4,Person search,2022-01-01 00:22:40,53.403914,-2.981499,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Arrest,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
53342,Person search,2022-12-31 23:50:36,53.466101,-2.921443,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,,False
53343,Person and Vehicle search,2022-12-31 23:55:04,53.463911,-2.976511,Male,25-34,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False
53344,Person search,2022-12-31 23:55:52,53.466101,-2.921443,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,,False
53345,Person search,2022-12-31 23:56:03,53.388654,-3.035339,Male,over 34,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False


# Map LSOA to coords

In [143]:
LSOA_df = pd.read_csv('../data/mapping_csvs/LSOA_to_coords.csv')
LSOA_df


# Define transformer from OSGB36 (EPSG:27700) to WGS84 (EPSG:4326)
transformer = Transformer.from_crs("EPSG:27700", "EPSG:4326", always_xy=True)

def convert_uk_coords(eastings, northings):
    """Vectorised function to convert Eastings/Northings to Longitude/Latitude."""
    longitudes, latitudes = transformer.transform(eastings, northings)
    return longitudes, latitudes



# Example DataFrame
LSOA_df['Longitude'], LSOA_df['Latitude'] = convert_uk_coords(LSOA_df['x'], LSOA_df['y'])


LSOA_df = LSOA_df[['LSOA01CD', 'Longitude', 'Latitude']]
LSOA_df

Unnamed: 0,LSOA01CD,Longitude,Latitude
0,E01000001,-0.096266,51.519526
1,E01000002,-0.092626,51.519692
2,E01000003,-0.095916,51.522029
3,E01000004,-0.097571,51.514134
4,E01000005,-0.074945,51.513756
...,...,...,...
34373,W01001892,-3.210109,51.507316
34374,W01001893,-3.248157,51.532365
34375,W01001894,-3.231427,51.522516
34376,W01001895,-3.231117,51.517978


In [181]:
# Convert crime DataFrames to GeoDataFrames
gdf_ss_m = gpd.GeoDataFrame(merseyside_ss_df, 
                            geometry=gpd.points_from_xy(merseyside_ss_df['Longitude'], 
                                                        merseyside_ss_df['Latitude']),
                            crs="EPSG:4326")  # WGS84 (standard lat/lon)

gdf_ss_l = gpd.GeoDataFrame(london_ss_df, 
                            geometry=gpd.points_from_xy(london_ss_df['Longitude'], 
                                                        london_ss_df['Latitude']),
                            crs="EPSG:4326")  # WGS84 (standard lat/lon)

# Convert LSOA DataFrame to GeoDataFrame
gdf_lsoa = gpd.GeoDataFrame(LSOA_df, 
                            geometry=gpd.points_from_xy(LSOA_df['Longitude'], 
                                                        LSOA_df['Latitude']),
                            crs="EPSG:4326")

# Reproject to British National Grid (EPSG:27700) for accurate distance calculations
gdf_ss_m = gdf_ss_m.to_crs("EPSG:27700")
gdf_ss_l = gdf_ss_l.to_crs("EPSG:27700")
gdf_lsoa = gdf_lsoa.to_crs("EPSG:27700")

# Perform spatial join with corrected CRS
merged_df_m = gpd.sjoin_nearest(gdf_ss_m, gdf_lsoa, how="left", distance_col="distance")
merged_df_l = gpd.sjoin_nearest(gdf_ss_l, gdf_lsoa, how="left", distance_col="distance")

# Convert back to EPSG:4326 (optional, if needed for mapping or output)
merged_df_m = merged_df_m.to_crs("EPSG:4326")
merged_df_l = merged_df_l.to_crs("EPSG:4326")

# Drop unnecessary columns if they exist
cols_to_drop = ['geometry', 'Longitude_right', 'Latitude_right']
merged_df_m = merged_df_m.drop(columns=[col for col in cols_to_drop if col in merged_df_m.columns])
merged_df_l = merged_df_l.drop(columns=[col for col in cols_to_drop if col in merged_df_l.columns])

# Rename columns to maintain consistency
merged_df_m = merged_df_m.rename(columns={'LSOA01CD': 'LSOA code',
                                          'Latitude_left': 'Latitude',
                                          'Longitude_left': 'Longitude'})
merged_df_l = merged_df_l.rename(columns={'LSOA01CD': 'LSOA code',
                                          'Latitude_left': 'Latitude',
                                          'Longitude_left': 'Longitude'})


# Display results
display(merged_df_m)
display(merged_df_l)


Unnamed: 0,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing,index_right,LSOA code,distance
0,Person search,2022-01-01 00:04:00,,,,,,,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,,,
1,Person search,2022-01-01 00:07:45,,,,,,,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,,,
2,Person search,2022-01-01 00:12:22,53.406618,-2.983878,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,6649.0,E01006650,387.744590
3,Person search,2022-01-01 00:17:54,,,,,,,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,,,
4,Person search,2022-01-01 00:22:40,53.403914,-2.981499,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Arrest,False,True,6510.0,E01006511,471.071883
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53342,Person search,2022-12-31 23:50:36,53.466101,-2.921443,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,,False,6657.0,E01006658,238.424011
53343,Person and Vehicle search,2022-12-31 23:55:04,53.463911,-2.976511,Male,25-34,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,7050.0,E01007051,261.024539
53344,Person search,2022-12-31 23:55:52,53.466101,-2.921443,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,,False,6657.0,E01006658,238.424011
53345,Person search,2022-12-31 23:56:03,53.388654,-3.035339,Male,over 34,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,7128.0,E01007129,133.645387


Unnamed: 0,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing,index_right,LSOA code,distance
0,Person search,2022-01-01 01:14:23,51.506255,-0.074901,Male,10-17,Other ethnic group - Not stated,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,0.0,0.0,4024.0,E01004025,414.636814
1,Person search,2022-01-01 01:20:32,51.506255,-0.074901,Male,10-17,Other ethnic group - Not stated,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,0.0,0.0,4024.0,E01004025,414.636814
2,Person search,2022-01-01 01:28:56,51.506255,-0.074901,Male,10-17,Asian/Asian British - Bangladeshi,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,0.0,0.0,4024.0,E01004025,414.636814
3,Person search,2022-01-01 01:48:59,51.508066,-0.087780,Male,,,,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,0.0,0.0,3934.0,E01003935,566.702621
4,Person search,2022-01-01 02:49:39,51.517680,-0.078484,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,0.0,0.0,4309.0,E01004310,319.655331
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182559,Person and Vehicle search,2022-12-17 21:35:00,51.631560,-0.195095,Male,25-34,Asian/Asian British - Any other Asian background,Asian,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,,278.0,E01000279,463.259019
182560,Person search,2022-12-17 21:57:00,,,Male,25-34,Other ethnic group - Not stated,White,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,Community resolution,,,,,
182561,Person search,2022-12-17 22:00:00,51.514365,-0.143408,Female,over 34,White - Any other White background,White,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,,,4763.0,E01004764,400.351140
182562,Person search,2022-12-17 22:16:00,,,Male,10-17,Black/African/Caribbean/Black British - Caribbean,Black,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,,,,,


## Sort for specific area:

https://geoportal.statistics.gov.uk/datasets/ons::lsoa-2021-to-local-authority-districts-april-2023-best-fit-lookup-in-ew/explore


In [None]:
LSOA_names = pd.read_csv('../data/mapping_csvs/LSOA_names.csv')

#https://www.ons.gov.uk/visualisations/areas/E11000002/ 
merseyside_names = ['Liverpool', 'Wirral', 'Sefton', 'Knowsley', 'St. Helens']

# Filter LSOAs where 'LAD23NM' is in merseyside_names
merseyside_LSOAs = LSOA_names[LSOA_names['LAD23NM'].isin(merseyside_names)]


merseyside_LSOAs = merseyside_LSOAs[['LSOA21CD']]
merseyside_LSOAs.columns = ['LSOA code']
merseyside_LSOAs




london_LADs = LSOA_names['LAD23NM'].unique()
london_LADs_list = LSOA_names['LAD23NM'].unique().tolist()

# Define a list of London boroughs 
london_boroughs = [
    "Barking and Dagenham", "Barnet", "Bexley", "Brent", "Bromley", "Camden",
    "Croydon", "Ealing", "Enfield", "Greenwich", "Hackney", "Hammersmith and Fulham",
    "Haringey", "Harrow", "Havering", "Hillingdon", "Hounslow", "Islington",
    "Kensington and Chelsea", "Kingston upon Thames", "Lambeth", "Lewisham",
    "Merton", "Newham", "Redbridge", "Richmond upon Thames", "Southwark",
    "Sutton", "Tower Hamlets", "Waltham Forest", "Wandsworth", "Westminster",
    "City of London" 
]

# Filter for London boroughs only
london_LSOAs = LSOA_names[LSOA_names['LAD23NM'].isin(london_boroughs)][['LSOA21CD']]
london_LSOAs.columns = ['LSOA code']



In [272]:

merseyside = merged_df_m.merge(merseyside_LSOAs, on='LSOA code', how='right')
london = merged_df_l.merge(london_LSOAs, on='LSOA code', how='right')


merseyside

Unnamed: 0,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing,index_right,LSOA code,distance
0,Person and Vehicle search,2022-01-16 16:57:20,53.476675,-2.875641,Male,18-24,Mixed/Multiple ethnic groups - Any other Mixed...,Black,Police and Criminal Evidence Act 1984 (section 1),Article for use in theft,A no further action disposal,,False,6433.0,E01006434,171.438495
1,Person search,2022-01-17 02:03:41,53.476675,-2.875641,Male,25-34,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Controlled drugs,A no further action disposal,,False,6433.0,E01006434,171.438495
2,Person search,2022-01-19 18:37:52,53.475239,-2.867928,Male,over 34,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Article for use in theft,A no further action disposal,,False,6433.0,E01006434,454.001372
3,Person search,2022-01-25 11:48:55,53.474090,-2.877502,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,6433.0,E01006434,221.084998
4,Person search,2022-01-31 11:23:07,53.474593,-2.876307,Male,25-34,Asian/Asian British - Any other Asian background,Asian,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,,False,6433.0,E01006434,124.013202
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37780,,NaT,,,,,,,,,,,,,E01034836,
37781,,NaT,,,,,,,,,,,,,E01034837,
37782,,NaT,,,,,,,,,,,,,E01034838,
37783,,NaT,,,,,,,,,,,,,E01034839,


# House Prices

In [235]:
# Read the 5th sheet (index 4, since indexing starts from 0)
xls = pd.read_excel('../data/house_prices/LSOA_house_prices.xls', 
                    sheet_name=5, 
                    engine='xlrd')

headings = xls.iloc[4]
house_prices = xls.iloc[5:]

# headings
house_prices.columns = headings

house_prices = house_prices[['Local authority code','LSOA code', 
                             'Year ending Jun 2022', 'Year ending Sep 2022', 'Year ending Dec 2022']]


house_prices['mean_house_price'] = (
    house_prices.iloc[:, 2:]
    .apply(pd.to_numeric, errors='coerce')  # Convert non-numeric values to NaN
    .mean(axis=1)
    .round()
)

house_prices = house_prices[['LSOA code', 'mean_house_price']]
house_prices 

4,LSOA code,mean_house_price
5,E01011949,100415.0
6,E01011950,55639.0
7,E01011951,71779.0
8,E01011952,77052.0
9,E01011953,95933.0
...,...,...
34753,W01001320,187601.0
34754,W01001321,128623.0
34755,W01001322,162018.0
34756,W01001324,179577.0


In [None]:
# Merge house prices data with the merseyside DataFrame
merseyside = house_prices.merge(merseyside, on='LSOA code', how='right')
london = house_prices.merge(london, on='LSOA code', how='right')

# Calculate the mean of 'mean_house_price'
m_mean = merseyside['mean_house_price'].mean()
l_mean = london['mean_house_price'].mean()

# Fill missing values with the mean house price
merseyside['mean_house_price'] = merseyside['mean_house_price'].fillna(m_mean)
london['mean_house_price'] = london['mean_house_price'].fillna(l_mean) # change the value of parliament to BIG!!!!!


# Display the result
merseyside


Unnamed: 0,LSOA code,mean_house_price,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing,index_right,distance
0,E01006434,124940.0000,Person and Vehicle search,2022-01-16 16:57:20,53.476675,-2.875641,Male,18-24,Mixed/Multiple ethnic groups - Any other Mixed...,Black,Police and Criminal Evidence Act 1984 (section 1),Article for use in theft,A no further action disposal,,False,6433.0,171.438495
1,E01006434,124940.0000,Person search,2022-01-17 02:03:41,53.476675,-2.875641,Male,25-34,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Controlled drugs,A no further action disposal,,False,6433.0,171.438495
2,E01006434,124940.0000,Person search,2022-01-19 18:37:52,53.475239,-2.867928,Male,over 34,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Article for use in theft,A no further action disposal,,False,6433.0,454.001372
3,E01006434,124940.0000,Person search,2022-01-25 11:48:55,53.474090,-2.877502,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,6433.0,221.084998
4,E01006434,124940.0000,Person search,2022-01-31 11:23:07,53.474593,-2.876307,Male,25-34,Asian/Asian British - Any other Asian background,Asian,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,,False,6433.0,124.013202
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37780,E01034836,146279.5293,,NaT,,,,,,,,,,,,,
37781,E01034837,146279.5293,,NaT,,,,,,,,,,,,,
37782,E01034838,146279.5293,,NaT,,,,,,,,,,,,,
37783,E01034839,146279.5293,,NaT,,,,,,,,,,,,,


In [274]:
merseyside.to_csv('../data/merseyside2022.csv')
london.to_csv('../data/london2022.csv')

## LSOA Populations

In [234]:
xlsx = pd.read_excel('../data/house_prices/LSOA_populations.xlsx', 
                     sheet_name=6,  # sheet index
                     engine='openpyxl')  # for .xlsx files

headings = xlsx.iloc[2]
LSOA_pop = xlsx.iloc[3:]

#headings
LSOA_pop.columns = headings
LSOA_pop = LSOA_pop[['LSOA 2021 Code', 'Total']]
LSOA_pop.columns = ['LSOA code', 'LSOA population']

LSOA_pop

KeyboardInterrupt: 

## Create Scores

In [128]:
# merge the house prices and population data
stats_2022 = house_prices.merge(LSOA_pop, on='LSOA code', how='left')

# round the values
mean_pop = round(stats_2022['LSOA population'].mean())  
mean_house = round(stats_2022['mean_house_price'].mean())  

# convert to numeric
stats_2022['LSOA population'] = pd.to_numeric(stats_2022['LSOA population'], errors='coerce')
stats_2022['mean_house_price'] = pd.to_numeric(stats_2022['mean_house_price'], errors='coerce')

# fill NA's with the means
stats_2022['LSOA population'] = stats_2022['LSOA population'].fillna(mean_pop) 
stats_2022['mean_house_price'] = stats_2022['mean_house_price'].fillna(mean_house) 


# Check for remaining NaN values
print(stats_2022.isna().sum())

stats_2022


LSOA code           0
mean_house_price    0
LSOA population     0
dtype: int64


Unnamed: 0,LSOA code,mean_house_price,LSOA population
0,E01011949,100415.0,1854.0
1,E01011950,55639.0,1037.0
2,E01011951,71779.0,1203.0
3,E01011952,77052.0,1610.0
4,E01011953,95933.0,1970.0
...,...,...,...
34748,W01001320,187601.0,1437.0
34749,W01001321,128623.0,1629.0
34750,W01001322,162018.0,1647.0
34751,W01001324,179577.0,1892.0


## Add crime rates to LSOA

In [129]:




merseyside_22 = merseyside_crimes_with_outcomes.merge(stats_2022, on='LSOA code', how='left')
london_22 = london_crimes_with_outcomes.merge(stats_2022, on='LSOA code', how='left')



merseyside_22 = merseyside_22[['Date', 'LSOA name', 'LSOA code', 'Crime type', 'mean_house_price', 'LSOA population','Last outcome category', 'Outcome type']]
london_22 = london_22[['Date', 'LSOA name', 'LSOA code', 'Crime type', 'mean_house_price', 'LSOA population','Last outcome category', 'Outcome type']]



merseyside_22['Drug crime rate'] = (merseyside_22['Crime type'] == 'Drugs').astype(int)
london_22['Drug crime rate'] = (london_22['Crime type'] == 'Drugs').astype(int)


merseyside_22['Violent crime rate'] = (merseyside_22['Crime type'] == 'Vehicle crime').astype(int)
london_22['Violent crime rate'] = (london_22['Crime type'] == 'Vehicle crime').astype(int)



prosecuted_outcomes = [
    'Suspect charged', 'Action to be taken by another organisation', 
    'Suspect charged as part of another case', 'Offender given penalty notice'
]



merseyside_22['Prosecution rate'] = merseyside_22['Last outcome category'].isin(prosecuted_outcomes).astype(int)
london_22['Prosecution rate'] = london_22['Last outcome category'].isin(prosecuted_outcomes).astype(int)


merseyside_22 = merseyside_22.groupby('LSOA code', as_index=False).agg({
    'LSOA population': 'first',
    'mean_house_price': 'first',
    'Crime type': 'count',
    'Drug crime rate': 'mean',
    'Violent crime rate': 'mean',
    'Prosecution rate': 'mean'})
london_22 = london_22.groupby('LSOA code', as_index=False).agg({
    'LSOA population': 'first',
    'mean_house_price': 'first',
    'Crime type': 'count',
    'Drug crime rate': 'mean',
    'Violent crime rate': 'mean',
    'Prosecution rate': 'mean'})







merseyside_22['Crime rate'] = merseyside_22['Crime type'] / merseyside_22['LSOA population']
london_22['Crime rate'] = london_22['Crime type'] / london_22['LSOA population']


merseyside_22 = merseyside_22.drop(columns={'Crime type'})
london_22 = london_22.drop(columns={'Crime type'})



merseyside_22[['Drug crime rate', 
    'Violent crime rate', 
    'Prosecution rate', 
    'Crime rate']] = round(merseyside_22[['Drug crime rate', 
                               'Violent crime rate', 
                               'Prosecution rate', 
                               'Crime rate']] *1000,2)
london_22[['Drug crime rate', 
    'Violent crime rate', 
    'Prosecution rate', 
    'Crime rate']] = round(london_22[['Drug crime rate', 
                               'Violent crime rate', 
                               'Prosecution rate', 
                               'Crime rate']] *1000,2)



display(london_22)
display(merseyside_22)


Unnamed: 0,LSOA code,LSOA population,mean_house_price,Drug crime rate,Violent crime rate,Prosecution rate,Crime rate
0,E01000001,1573.0,942221.0,55.78,131.47,11.95,159.57
1,E01000002,1407.0,1035931.0,30.73,13.97,0.00,254.44
2,E01000003,1610.0,605185.0,15.38,61.54,0.00,80.75
3,E01000005,1104.0,358847.0,66.67,48.98,5.44,665.76
4,E01000006,1829.0,277604.0,107.69,161.54,23.08,71.08
...,...,...,...,...,...,...,...
7872,W01001870,1420.0,226517.0,0.00,0.00,0.00,1.41
7873,W01001897,1854.0,363184.0,0.00,0.00,0.00,1.08
7874,W01001940,1049.0,178192.0,0.00,0.00,0.00,0.95
7875,W01001953,1495.0,298163.0,0.00,0.00,0.00,0.67


Unnamed: 0,LSOA code,LSOA population,mean_house_price,Drug crime rate,Violent crime rate,Prosecution rate,Crime rate
0,E01006225,1557.0,196182.0,500.00,0.00,0.0,1.28
1,E01006264,1237.0,239444.0,0.00,333.33,0.0,2.43
2,E01006346,2077.0,191080.0,0.00,0.00,0.0,0.48
3,E01006366,1823.0,231052.0,0.00,0.00,0.0,1.65
4,E01006368,1876.0,188987.0,250.00,0.00,0.0,2.13
...,...,...,...,...,...,...,...
965,E01034836,,,222.22,0.00,0.0,
966,E01034837,,,200.00,0.00,0.0,
967,E01034838,,,285.71,142.86,0.0,
968,E01034839,,,181.82,0.00,0.0,


In [33]:
# stop_counts = london_ss_df['LSOA code'].value_counts().reset_index()
# stop_counts.columns = ['LSOA21CD', 'stop_counts']  # Rename columns for clarity

# # Step 2: Merge with population data
# lsoas_population = london[['LSOA code', 'LSOA population']].rename(columns={'LSOA code': 'LSOA21CD'})
# merged_lsoas = pd.merge(stop_counts, lsoas_population, on='LSOA21CD', how='left')

# # Step 3: Calculate stops per area (stops per population)
# merged_lsoas['stops_per_area'] = merged_lsoas['stop_counts'] / merged_lsoas['LSOA population'] * 100

# merged_lsoas
# london_ss_df

In [None]:
london_ss_df['Longitude']
london_ss_df['Latitude']



np.float64(51.506255)

# Adding LSOA to Stop and Search

In [None]:
# import geopandas as gpd
# from shapely.geometry import Point

# # Load LSOA shapefile
# lsoa_gdf = gpd.read_file("../data/mapping_csvs/lsoa_shapefiles/LSOA_2011_EW_BGC_V3.shp")

# # Convert your coordinates into a shapely Point
# test_point = Point(-0.095, 51.517)

# # Ensure the point has the same CRS as the LSOA shapefile
# =
# lsoa_crs = lsoa_gdf.crs  # Get CRS of LSOA shapefile
# test_point_gdf = gpd.GeoDataFrame(geometry=[test_point], crs="EPSG:4326")  # Default CRS of Point is EPSG:4326

# # Reproject the point to the LSOA CRS
# test_point_gdf = test_point_gdf.to_crs(lsoa_crs)

# # Find the LSOA that contains the point
# matching_lsoa = lsoa_gdf[lsoa_gdf.contains(test_point_gdf.geometry.iloc[0])]

# matching_lsoa['LSOA11CD']


1    E01000002
Name: LSOA11CD, dtype: object

In [None]:
# coords = london_ss_df[['Longitude', 'Latitude']].iloc[:100]

In [None]:
# import geopandas as gpd
# from shapely.geometry import Point

# # https://geoportal.statistics.gov.uk/datasets/02e8d336d6804fbeabe6c972e5a27b16_0/explore?location=53.390230%2C-2.976211%2C12.31&showTable=true
# lsoa_gdf = gpd.read_file("../data/mapping_csvs/lsoa_shapefiles/LSOA_2011_EW_BGC_V3.shp")


# coords = merseyside_ss_df[['Longitude', 'Latitude']]
# coords = london_ss_df[['Longitude', 'Latitude']]


# # Convert the Longitude and Latitude columns into a list of Point geometries
# points = [Point(lon, lat) for lon, lat in zip(coords['Longitude'], coords['Latitude'])]

# # Convert the points into a GeoDataFrame
# points_gdf = gpd.GeoDataFrame(coords, geometry=points, crs="EPSG:4326")  # CRS is set to WGS84

# # Load the LSOA shapefile (ensure this is already loaded as lsoa_gdf)
# # Reproject points to the same CRS as the LSOA shapefile
# lsoa_crs = lsoa_gdf.crs
# points_gdf = points_gdf.to_crs(lsoa_crs)

# # Perform the spatial join to find which LSOAs contain each point
# result = gpd.sjoin(points_gdf, lsoa_gdf, how="left", predicate='within')

# # The result now contains LSOA information for each point, such as LSOA11CD
# result[['Longitude', 'Latitude', 'LSOA11CD']]
# # result

# merseyside_ss_df['LSOA code'] = result[['LSOA11CD']]
# merseyside_ss_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merseyside_ss_df['LSOA code'] = result[['LSOA11CD']]


Unnamed: 0,Longitude,Latitude,LSOA code
0,,,
1,,,
2,-2.983878,53.406618,
3,,,
4,-2.981499,53.403914,E01032739
...,...,...,...
53342,-2.921443,53.466101,E01001717
53343,-2.976511,53.463911,E01001719
53344,-2.921443,53.466101,E01001717
53345,-3.035339,53.388654,E01033327


In [None]:
# def get_lsoa_codes(coords_df, lsoa_gdf, crs="EPSG:4326"):
#     """
#     Converts coordinates to points, reprojects to LSOA CRS, 
#     and performs a spatial join to return the corresponding LSOA codes.
    
#     Parameters:
#     - coords_df: DataFrame containing 'Longitude' and 'Latitude' columns
#     - lsoa_gdf: GeoDataFrame containing the LSOA shapefile
#     - crs: Coordinate reference system for the points (default is WGS84)

#     Returns:
#     - Updated DataFrame with 'LSOA code' column added
#     """
#     # Convert the Longitude and Latitude columns into a list of Point geometries
#     points = [Point(lon, lat) for lon, lat in zip(coords_df['Longitude'], coords_df['Latitude'])]

#     # Convert the points into a GeoDataFrame
#     points_gdf = gpd.GeoDataFrame(coords_df, geometry=points, crs=crs)  # CRS is set to WGS84

#     # Reproject points to the same CRS as the LSOA shapefile
#     lsoa_crs = lsoa_gdf.crs
#     points_gdf = points_gdf.to_crs(lsoa_crs)

#     # Perform the spatial join to find which LSOAs contain each point
#     result = gpd.sjoin(points_gdf, lsoa_gdf, how="left", predicate='within')

#     # Add LSOA code to the original DataFrame using .loc to avoid the SettingWithCopyWarning
#     # coords_df.loc[:, 'LSOA code'] = result['LSOA11CD']

#     return result['LSOA11CD']



# # # Load the LSOA shapefile (this is done once)
# # lsoa_gdf = gpd.read_file("../data/mapping_csvs/lsoa_shapefiles/LSOA_2011_EW_BGC_V3.shp")

# # # Get LSOA codes for Merseyside
# # merseyside_ss_df = get_lsoa_codes(merseyside_ss_df[['Longitude', 'Latitude']], lsoa_gdf)

# # # Get LSOA codes for London
# # london_ss_df = get_lsoa_codes(london_ss_df[['Longitude', 'Latitude']], lsoa_gdf)

# # # The updated dataframes now contain 'LSOA code' for both Merseyside and London
# # # display(merseyside_ss_df[['Longitude', 'Latitude', 'LSOA code']])
# # # display(london_ss_df[['Longitude', 'Latitude', 'LSOA code']])

# # merseyside_ss_df

In [None]:
# get_lsoa_codes(merseyside_ss_df[['Longitude', 'Latitude']], lsoa_gdf)

0              NaN
1              NaN
2        E01033760
3              NaN
4        E01033756
           ...    
53342    E01006660
53343    E01007051
53344    E01006660
53345    E01007129
53346    E01032508
Name: LSOA11CD, Length: 53347, dtype: object

In [121]:
LSOA_df = pd.read_csv('../data/mapping_csvs/LSOA_to_coords.csv')
LSOA_df

Unnamed: 0,FID,LSOA01CD,LSOA01NM,GlobalID,x,y
0,1,E01000001,City of London 001A,3f778391-d557-47f0-91ab-257ee265c5c6,532182,181785
1,2,E01000002,City of London 001B,1cdc7e6c-a108-4410-80d5-dc6623582676,532434,181810
2,3,E01000003,City of London 001C,95f673aa-8bc2-447d-88e6-f15d77bd5492,532199,182064
3,4,E01000004,City of London 001D,408281cc-db8a-44c3-a611-d7497ef3545e,532107,181183
4,5,E01000005,City of London 001E,03ed3f20-2f4d-4870-bb54-5bd372c80e68,533678,181182
...,...,...,...,...,...,...
34373,34374,W01001892,Cardiff 020D,5b54ed80-cdb0-445f-9f1c-b389ed8b046b,316112,179404
34374,34375,W01001893,Cardiff 010B,b6beb227-3850-4ac1-a5c0-c0cabf9d236d,313519,182234
34375,34376,W01001894,Cardiff 010C,468e8bc5-d07d-4ecf-9c59-3b5f96abec6c,314661,181119
34376,34377,W01001895,Cardiff 010D,37741ec6-5ed5-4526-b5a3-8235720d3903,314674,180614


### Convert Ordnance Survey National Grid to longitude/latitude

In [None]:
LSOA_df = pd.read_csv('../data/mapping_csvs/LSOA_to_coords.csv')
LSOA_df

# import numpy as np
from pyproj import Transformer

# Define transformer from OSGB36 (EPSG:27700) to WGS84 (EPSG:4326)
transformer = Transformer.from_crs("EPSG:27700", "EPSG:4326", always_xy=True)

def convert_uk_coords(eastings, northings):
    """Vectorised function to convert Eastings/Northings to Longitude/Latitude."""
    longitudes, latitudes = transformer.transform(eastings, northings)
    return longitudes, latitudes


df = pd.DataFrame()

# Example DataFrame
LSOA_df['Longitude'], LSOA_df['Latitude'] = convert_uk_coords(LSOA_df['x'], LSOA_df['y'])


LSOA_df = LSOA_df[['LSOA01CD', 'Longitude', 'Latitude']]
LSOA_df

Unnamed: 0,LSOA01CD,Longitude,Latitude
0,E01000001,-0.096266,51.519526
1,E01000002,-0.092626,51.519692
2,E01000003,-0.095916,51.522029
3,E01000004,-0.097571,51.514134
4,E01000005,-0.074945,51.513756
...,...,...,...
34373,W01001892,-3.210109,51.507316
34374,W01001893,-3.248157,51.532365
34375,W01001894,-3.231427,51.522516
34376,W01001895,-3.231117,51.517978


here is the issue

# Assign each Coordinate in Stop and search data to LSOA

using geopandas

In [131]:


# Convert Merseyside crime DataFrame to GeoDataFrame
gdf_ss_m = gpd.GeoDataFrame(merseyside_ss_df, 
                              geometry=gpd.points_from_xy(merseyside_ss_df['Longitude'], 
                                                          merseyside_ss_df['Latitude']),
                              crs="EPSG:4326")  # WGS84 (standard lat/lon)
# Convert London crime DataFrame to GeoDataFrame
gdf_ss_l = gpd.GeoDataFrame(london_ss_df, 
                              geometry=gpd.points_from_xy(london_ss_df['Longitude'], 
                                                          london_ss_df['Latitude']),
                              crs="EPSG:4326")  # WGS84 (standard lat/lon)



# Convert LSOA centroids DataFrame to GeoDataFrame
gdf_lsoa = gpd.GeoDataFrame(LSOA_df, 
                            geometry=gpd.points_from_xy(LSOA_df['Longitude'], 
                                                        LSOA_df['Latitude']),
                            crs="EPSG:4326")


# Perform spatial join to associate each crime with its nearest LSOA
merged_df_m = gpd.sjoin_nearest(gdf_ss_m, gdf_lsoa, how="left")
merged_df_l = gpd.sjoin_nearest(gdf_ss_l, gdf_lsoa, how="left")



# Drop unnecessary columns and check results
merged_df_m = merged_df_m.drop(columns=['geometry', 'Longitude_right', 'Latitude_right'])  # Remove unnecessary columns if not needed
merged_df_l = merged_df_l.drop(columns=['geometry', 'Longitude_right', 'Latitude_right'])  # Remove unnecessary columns if not needed


merged_df_m = merged_df_m.rename(columns={'LSOA01CD': 'LSOA code',
                                      'Latitude_left': 'Latitude',
                                      'Longitude_left': 'Longitude'})
merged_df_l = merged_df_l.rename(columns={'LSOA01CD': 'LSOA code',
                                      'Latitude_left': 'Latitude',
                                      'Longitude_left': 'Longitude'})



# merge the stats
merged_df_m.merge(merseyside_22,on='LSOA code')
merged_df_l.merge(london_22,on='LSOA code')



display(merged_df_m)
display(merged_df_l)






Unnamed: 0,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing,index_right,LSOA code
0,Person search,2022-01-01 00:04:00,,,,,,,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,,
1,Person search,2022-01-01 00:07:45,,,,,,,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,,
2,Person search,2022-01-01 00:12:22,53.406618,-2.983878,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,6649.0,E01006650
3,Person search,2022-01-01 00:17:54,,,,,,,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,,
4,Person search,2022-01-01 00:22:40,53.403914,-2.981499,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Arrest,False,True,6510.0,E01006511
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53342,Person search,2022-12-31 23:50:36,53.466101,-2.921443,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,,False,6657.0,E01006658
53343,Person and Vehicle search,2022-12-31 23:55:04,53.463911,-2.976511,Male,25-34,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,7050.0,E01007051
53344,Person search,2022-12-31 23:55:52,53.466101,-2.921443,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,,False,6657.0,E01006658
53345,Person search,2022-12-31 23:56:03,53.388654,-3.035339,Male,over 34,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,False,7128.0,E01007129


Unnamed: 0,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing,index_right,LSOA code
0,Person search,2022-01-01 01:14:23,51.506255,-0.074901,Male,10-17,Other ethnic group - Not stated,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,0.0,0.0,4024.0,E01004025
1,Person search,2022-01-01 01:20:32,51.506255,-0.074901,Male,10-17,Other ethnic group - Not stated,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,0.0,0.0,4024.0,E01004025
2,Person search,2022-01-01 01:28:56,51.506255,-0.074901,Male,10-17,Asian/Asian British - Bangladeshi,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,0.0,0.0,4024.0,E01004025
3,Person search,2022-01-01 01:48:59,51.508066,-0.087780,Male,,,,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,0.0,0.0,3980.0,E01003981
4,Person search,2022-01-01 02:49:39,51.517680,-0.078484,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,0.0,0.0,4306.0,E01004307
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182559,Person and Vehicle search,2022-12-17 21:35:00,51.631560,-0.195095,Male,25-34,Asian/Asian British - Any other Asian background,Asian,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,,278.0,E01000279
182560,Person search,2022-12-17 21:57:00,,,Male,25-34,Other ethnic group - Not stated,White,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,Community resolution,,,,
182561,Person search,2022-12-17 22:00:00,51.514365,-0.143408,Female,over 34,White - Any other White background,White,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,,,4764.0,E01004765
182562,Person search,2022-12-17 22:16:00,,,Male,10-17,Black/African/Caribbean/Black British - Caribbean,Black,Police and Criminal Evidence Act 1984 (section 1),Stolen goods,A no further action disposal,,,,


### Combine LSOA stats with stop and search

In [135]:
# merseyside_df = merged_df_m.merge(merseyside_22, on='LSOA code' )
merseyside_df = merged_df_m.merge(merseyside_22, on='LSOA code', how='left')

# london_df = merged_df_l.merge(london_22, on='LSOA code' )
london_df = merged_df_m.merge(london_22, on='LSOA code', how='left')


merseyside_df = merseyside_df[['Date', 'LSOA code', 'Latitude', 'Longitude', 'Type', 'Gender', 'Age range', 
               'Self-defined ethnicity', 'Officer-defined ethnicity', 
               'Legislation', 'Object of search', 'Outcome' , 'LSOA population',
               'mean_house_price', 'Drug crime rate', 'Violent crime rate',
               'Prosecution rate', 'Crime rate']]
london_df = london_df[['Date', 'LSOA code', 'Latitude', 'Longitude', 'Type', 'Gender', 'Age range', 
               'Self-defined ethnicity', 'Officer-defined ethnicity', 
               'Legislation', 'Object of search', 'Outcome' , 'LSOA population',
               'mean_house_price', 'Drug crime rate', 'Violent crime rate',
               'Prosecution rate', 'Crime rate']]

display(london_df)
display(merseyside_df)

Unnamed: 0,Date,LSOA code,Latitude,Longitude,Type,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,LSOA population,mean_house_price,Drug crime rate,Violent crime rate,Prosecution rate,Crime rate
0,2022-01-01 00:04:00,,,,Person search,,,,,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,,,,,
1,2022-01-01 00:07:45,,,,Person search,,,,,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,,,,,
2,2022-01-01 00:12:22,E01006650,53.406618,-2.983878,Person search,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,,,,,
3,2022-01-01 00:17:54,,,,Person search,,,,,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,,,,,
4,2022-01-01 00:22:40,E01006511,53.403914,-2.981499,Person search,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Arrest,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53342,2022-12-31 23:50:36,E01006658,53.466101,-2.921443,Person search,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,,,,,,
53343,2022-12-31 23:55:04,E01007051,53.463911,-2.976511,Person and Vehicle search,Male,25-34,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,,,,,
53344,2022-12-31 23:55:52,E01006658,53.466101,-2.921443,Person search,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,,,,,,
53345,2022-12-31 23:56:03,E01007129,53.388654,-3.035339,Person search,Male,over 34,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,,,,,


Unnamed: 0,Date,LSOA code,Latitude,Longitude,Type,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,LSOA population,mean_house_price,Drug crime rate,Violent crime rate,Prosecution rate,Crime rate
0,2022-01-01 00:04:00,,,,Person search,,,,,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,,,,,
1,2022-01-01 00:07:45,,,,Person search,,,,,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,,,,,
2,2022-01-01 00:12:22,E01006650,53.406618,-2.983878,Person search,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,,,,,
3,2022-01-01 00:17:54,,,,Person search,,,,,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,,,,,,
4,2022-01-01 00:22:40,E01006511,53.403914,-2.981499,Person search,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,Arrest,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53342,2022-12-31 23:50:36,E01006658,53.466101,-2.921443,Person search,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,1408.0,106175.0,109.95,15.71,15.71,135.65
53343,2022-12-31 23:55:04,E01007051,53.463911,-2.976511,Person and Vehicle search,Male,25-34,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,1315.0,121833.0,96.26,21.39,10.70,142.21
53344,2022-12-31 23:55:52,E01006658,53.466101,-2.921443,Person search,Male,10-17,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,1408.0,106175.0,109.95,15.71,15.71,135.65
53345,2022-12-31 23:56:03,E01007129,53.388654,-3.035339,Person search,Male,over 34,White - English/Welsh/Scottish/Northern Irish/...,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,A no further action disposal,1454.0,125266.0,145.51,55.73,12.38,222.15


# Adding distance to station

https://github.com/davwheat/uk-railway-stations/blob/main/stations.csv

In [None]:

stations_df = pd.read_csv('https://raw.githubusercontent.com/davwheat/uk-railway-stations/refs/heads/main/stations.csv')

stations_df.columns = ['Name', 'Latitude', 'Longitude', 'code#1', 'code#2']
stations_df = stations_df[['Longitude', 'Latitude']]

stations_df

## Mapping the nearest station to the stop 

In [None]:
# Step 1: Convert Merseyside points to a GeoDataFrame
merseyside_map = gpd.GeoDataFrame(merseyside_df, 
                        geometry=gpd.points_from_xy(merseyside_df['Longitude'], 
                                                    merseyside_df['Latitude']),
                        crs="EPSG:4326")  # WGS84 (lat/lon)
london_map = gpd.GeoDataFrame(london_df, 
                        geometry=gpd.points_from_xy(london_df['Longitude'], 
                                                    london_df['Latitude']),
                        crs="EPSG:4326")  # WGS84 (lat/lon)


# Step 2: Convert Stations points to a GeoDataFrame
map_to_station = gpd.GeoDataFrame(stations_df, 
                        geometry=gpd.points_from_xy(stations_df['Longitude'], 
                                                    stations_df['Latitude']),
                        crs="EPSG:4326")  # WGS84 (lat/lon)

# Step 3: Reproject to British National Grid (EPSG:27700) for accurate distance measurements
merseyside_map = merseyside_map.to_crs("EPSG:27700")
london_map = london_map.to_crs("EPSG:27700")
map_to_station = map_to_station.to_crs("EPSG:27700")

# Step 4: Perform spatial join to map nearest train station to each Merseyside point
merseyside_results = gpd.sjoin_nearest(merseyside_map, map_to_station, how="left", distance_col="distance", lsuffix='_merseyside', rsuffix='_station')

london_results = gpd.sjoin_nearest(london_map, map_to_station, how="left", distance_col="distance", lsuffix='_london', rsuffix='_station')


# Step 5: Extract the list of distances
m_distances_list = merseyside_results['distance']
l_distances_list = london_results['distance']



In [None]:
merseyside_df['Distance to station'] = m_distances_list
london_df['Distance to station'] = l_distances_list

display(london_df)
display(merseyside_df)

# Save to CSV

In [None]:
london_df.to_csv('../data/london2022.csv')
merseyside_df.to_csv('../data/merseyside2022.csv')

In [None]:
import geopandas as gpd
from shapely.geometry import Point
# https://geoportal.statistics.gov.uk/datasets/02e8d336d6804fbeabe6c972e5a27b16_0/explore?location=53.390230%2C-2.976211%2C12.31&showTable=true

# Load LSOA shapefile (replace with your file path)
lsoa_gdf = gpd.read_file("../data/mapping_csvs/lsoa_shapefiles/LSOA_2011_EW_BGC_V3.shp")

# Convert your coordinates into a shapely Point
test_point = Point(-0.095, 51.517)

# Find the LSOA that contains the point
matching_lsoa = lsoa_gdf[lsoa_gdf.contains(test_point)]

print(matching_lsoa[["LSOA11CD", "LSOA11NM"]])


Empty DataFrame
Columns: [LSOA11CD, LSOA11NM]
Index: []
