# Creating the London and Merseyside CSVs

Load in libraries:

In [None]:
import os
import pandas as pd 
from pyproj import Transformer
import geopandas as gpd

### Load the data

https://data.police.uk/data/

In [None]:
path = '../data/police_zips'    

# find all csv file paths
csv_names = []
for folder in os.listdir(path):
        
            folder_contents = os.listdir(f'{path}/{folder}')
            
            for file in folder_contents:
                
                    
                csv_names.append(f'{path}/{folder}/{file}')



print(f'There are {len(csv_names)} different CSVs')
        

There are 108 different CSVs


### Extract the CSV's

In [None]:
crimes, outcomes, stops = [], [], []

# loop through the csvs
for csv in csv_names:
    
    if "stop-and-search.csv" in csv:
        stops.append(pd.read_csv(csv))


    elif "street.csv" in csv:
        crimes.append(pd.read_csv(csv))

    elif "outcomes.csv" in csv:
        outcomes.append(pd.read_csv(csv))


outcomes_df = pd.concat(outcomes, ignore_index=True)
crimes_df = pd.concat(crimes, ignore_index=True)
stops_df = pd.concat(stops, ignore_index=True)


### Combine the crime and outcomes by 'CRIME ID'

In [None]:
crimes_df = crimes_df.merge(outcomes_df[['Crime ID', 'Outcome type']] , on='Crime ID', how='left')
display(crimes_df.head(5))

Unnamed: 0,Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context,Outcome type
0,f344b946a36b4dc1db0a4bb889e8ec0fd23ab65aa2bf39...,2022-01,City of London Police,City of London Police,-0.106453,51.518207,On or near Charterhouse Street,E01000916,Camden 027B,Burglary,Status update unavailable,,
1,e74962917ce995fa9e52623b6fe0c218619b79d4a22550...,2022-01,City of London Police,City of London Police,-0.113256,51.516824,On or near Old Square,E01000914,Camden 028B,Other theft,Investigation complete; no suspect identified,,Investigation complete; no suspect identified
2,067092d6822753127ce767d011ea5c5b4375de6f5a3c48...,2022-01,City of London Police,City of London Police,-0.1161,51.51847,On or near Supermarket,E01000914,Camden 028B,Other theft,Status update unavailable,,
3,,2022-01,City of London Police,City of London Police,-0.097601,51.520699,On or near Carthusian Street,E01000001,City of London 001A,Anti-social behaviour,,,
4,,2022-01,City of London Police,City of London Police,-0.095914,51.520348,On or near Beech Street,E01000001,City of London 001A,Anti-social behaviour,,,


### Convert to datetime


In [5]:
crimes_df['Date'] = pd.to_datetime(crimes_df['Month'], format= '%Y-%m')
stops_df['Date'] = pd.to_datetime(stops_df['Date']).dt.tz_convert(None) # convert to datetime and remove timezone
stops_df.drop(columns=['Part of a policing operation', 'Policing operation'], inplace=True) # remove empty columns
display(stops_df.head(5))

Unnamed: 0,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing
0,Person search,2022-01-01 01:14:23,51.506255,-0.074901,Male,10-17,Other ethnic group - Not stated,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,False,False
1,Person search,2022-01-01 01:20:32,51.506255,-0.074901,Male,10-17,Other ethnic group - Not stated,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,False,False
2,Person search,2022-01-01 01:28:56,51.506255,-0.074901,Male,10-17,Asian/Asian British - Bangladeshi,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,False,False
3,Person search,2022-01-01 01:48:59,51.508066,-0.08778,Male,,,,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,False,False
4,Person search,2022-01-01 02:49:39,51.51768,-0.078484,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,False,False


## Map LSOA to coords

In [6]:
LSOA_df = pd.read_csv('../data/mapping_csvs/LSOA_to_coords.csv')

# Define transformer from OSGB36 (EPSG:27700) to WGS84 (EPSG:4326)
transformer = Transformer.from_crs("EPSG:27700", "EPSG:4326", always_xy=True)

def convert_uk_coords(eastings, northings):
    """Vectorised function to convert Eastings/Northings to Longitude/Latitude."""
    longitudes, latitudes = transformer.transform(eastings, northings)
    return longitudes, latitudes



LSOA_df['Longitude'], LSOA_df['Latitude'] = convert_uk_coords(LSOA_df['x'], LSOA_df['y'])


LSOA_df = LSOA_df[['LSOA01CD', 'Longitude', 'Latitude']]
LSOA_df

Unnamed: 0,LSOA01CD,Longitude,Latitude
0,E01000001,-0.096266,51.519526
1,E01000002,-0.092626,51.519692
2,E01000003,-0.095916,51.522029
3,E01000004,-0.097571,51.514134
4,E01000005,-0.074945,51.513756
...,...,...,...
34373,W01001892,-3.210109,51.507316
34374,W01001893,-3.248157,51.532365
34375,W01001894,-3.231427,51.522516
34376,W01001895,-3.231117,51.517978


### Geo map the LSOA codes

In [7]:
# Convert crime DataFrames to GeoDataFrames
gdf_ss = gpd.GeoDataFrame(stops_df, 
                            geometry=gpd.points_from_xy(stops_df['Longitude'], 
                                                        stops_df['Latitude']),
                            crs="EPSG:4326")  # WGS84 (standard lat/lon)

# Convert LSOA DataFrame to GeoDataFrame
gdf_lsoa = gpd.GeoDataFrame(LSOA_df, 
                            geometry=gpd.points_from_xy(LSOA_df['Longitude'], 
                                                        LSOA_df['Latitude']),
                            crs="EPSG:4326")

# Reproject to British National Grid (EPSG:27700) for accurate distance calculations
gdf_ss = gdf_ss.to_crs("EPSG:27700")
gdf_lsoa = gdf_lsoa.to_crs("EPSG:27700")

# Perform spatial join with corrected CRS
stops_df = gpd.sjoin_nearest(gdf_ss, gdf_lsoa, how="left", distance_col="distance")


# Convert back to EPSG:4326 (optional, if needed for mapping or output)
stops_df = stops_df.to_crs('EPSG:4326')

# Drop unnecessary columns if they exist
cols_to_drop = ['geometry', 'Longitude_right', 'Latitude_right']
stops_df = stops_df.drop(columns=[col for col in cols_to_drop if col in stops_df.columns])

# Rename columns to maintain consistency
stops_df = stops_df.rename(columns={'LSOA01CD': 'LSOA code',
                                          'Latitude_left': 'Latitude',
                                          'Longitude_left': 'Longitude'})

display(stops_df.head(5))


Unnamed: 0,Type,Date,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing,index_right,LSOA code,distance
0,Person search,2022-01-01 01:14:23,51.506255,-0.074901,Male,10-17,Other ethnic group - Not stated,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,False,False,4024.0,E01004025,414.636814
1,Person search,2022-01-01 01:20:32,51.506255,-0.074901,Male,10-17,Other ethnic group - Not stated,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,False,False,4024.0,E01004025,414.636814
2,Person search,2022-01-01 01:28:56,51.506255,-0.074901,Male,10-17,Asian/Asian British - Bangladeshi,Asian,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,False,False,4024.0,E01004025,414.636814
3,Person search,2022-01-01 01:48:59,51.508066,-0.08778,Male,,,,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,False,False,3934.0,E01003935,566.702621
4,Person search,2022-01-01 02:49:39,51.51768,-0.078484,Male,18-24,White - English/Welsh/Scottish/Northern Irish/...,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,A no further action disposal,False,False,4309.0,E01004310,319.655331


## Mapping the nearest station to the stop 


### Trainline GitHub Dataset
Locating the locations of all the train stations in the UK

https://github.com/trainline-eu



In [None]:
stations = pd.read_csv('https://raw.githubusercontent.com/trainline-eu/stations/refs/heads/master/stations.csv', 
                       sep=';',
                       low_memory=False)

stations = stations[stations['country'] == 'GB']
stations_df = stations[['latitude', 'longitude']].reset_index(drop=True)

stations_df

Unnamed: 0,latitude,longitude
0,51.531921,-0.126361
1,50.907742,-1.413983
2,53.405250,-2.977841
3,53.605507,-0.267936
4,53.574219,-0.409679
...,...,...
2783,52.191230,-2.222310
2784,53.046000,-2.993000
2785,50.945200,-2.637000
2786,51.128079,1.315000


In [None]:
stops_map = gpd.GeoDataFrame(stops_df, 
                             geometry=gpd.points_from_xy(stops_df['Longitude'],
                                                         stops_df['Latitude']),
                                                         crs="EPSG:4326")  # WGS84 (lat/lon)

map_to_station = gpd.GeoDataFrame(stations_df, 
                                  geometry=gpd.points_from_xy(stations_df['longitude'], 
                                                              stations_df['latitude']),
                                                              crs="EPSG:4326")  # WGS84 (lat/lon)



map_to_station = map_to_station.to_crs("EPSG:27700")
stops_map = stops_map.to_crs("EPSG:27700")

results = gpd.sjoin_nearest(stops_map, map_to_station, how="left", distance_col="distance2station", lsuffix='_stops', rsuffix='_station')


results = results.drop_duplicates(subset=stops_map.columns)


# stops_df['distance2station'] = round(results['distance2station'])

# display(stops_df)
# round(results['distance'])
# display(results)
print(len(stops_df))
print(len(stops_map))
print(len(map_to_station))
len(results)





235911
235911
2788


226041

In [None]:
stops_df['coordinates'] = list(zip(stops_df['Longitude'], stops_df['Latitude']))

stations_df['coordinates'] = list(zip(stations_df['longitude'], stations_df['latitude']))


In [None]:
from scipy.spatial.distance import cdist


# Remove rows with NaN coordinates in either stops or stations
stops_df_clean = stops_df.dropna(subset=['coordinates'])
stations_df_clean = stations_df.dropna(subset=['coordinates'])

# Reconvert coordinates into NumPy arrays
stop_coords = np.array(stops_df_clean['coordinates'].tolist())
station_coords = np.array(stations_df_clean['coordinates'].tolist())

# Compute the pairwise distances between all stops and stations using cdist
distances = cdist(stop_coords, station_coords, metric='euclidean')

# Find the index of the closest station for each stop
nearest_station_indices = np.argmin(distances, axis=1)

# Get the minimum distances
min_distances = distances[np.arange(distances.shape[0]), nearest_station_indices]

# Add the minimum distances to your stops DataFrame
stops_df_clean['distance_to_nearest_station'] = min_distances

# You can now merge this back to the original stops_df to preserve rows with NaN if needed
stops_df = stops_df.merge(stops_df_clean[['coordinates', 'distance_to_nearest_station']], on='coordinates', how='left')

# Display the updated stops DataFrame
print(stops_df[['coordinates', 'distance_to_nearest_station']])


# Group LSOA Statistics

In [148]:

dataset = stops_df.groupby('LSOA code').agg(
   
    # White_Officer_stops=('Officer-defined ethnicity', lambda x: (x.eq('White').sum())),
    # White_Stops=('Self-defined ethnicity', lambda x: ((x.str.contains("White", case=False, na=False).sum()))),
    Stop_count_drugs=('Object of search', lambda x: (x.str.contains("drugs", case=False, na=False).sum())), 
    Arrest_outcome=('Outcome', lambda x: (x.str.contains('Arrest', case=False, na=False).sum())),
    Train_distance=('distance', lambda x: x.mean()),
    Stop_Count=('Date', 'count'),


).round(2).reset_index()



dataset


Unnamed: 0,LSOA code,Stop_count_drugs,Arrest_outcome,Train_distance,Stop_Count
0,E01000001,54,10,367.21,94
1,E01000002,93,22,240.85,143
2,E01000003,12,7,531.66,35
3,E01000004,205,75,299.58,423
4,E01000005,282,92,339.53,476
...,...,...,...,...,...
5781,W01000382,1,0,1393.00,1
5782,W01000413,9,4,1635.44,9
5783,W01000414,1,1,1932.00,1
5784,W01000421,4,0,814.00,4


## Clean dual jurisdiction cases 
### Add missing LSOAs with no Stop and Search DATA

https://geoportal.statistics.gov.uk/datasets/ons::lsoa-2021-to-local-authority-districts-april-2023-best-fit-lookup-in-ew/explore


In [149]:
LSOA_names = pd.read_csv('../data/mapping_csvs/LSOA_names.csv')

# Define a list of Merseyside boroughs 
#https://www.ons.gov.uk/visualisations/areas/E11000002/ 

# Define a list of London boroughs 
# https://www.ons.gov.uk/visualisations/areas/E12000007/


boroughs = [
    "Barking and Dagenham", "Barnet", "Bexley", "Brent", "Bromley", "Camden",
    "Croydon", "Ealing", "Enfield", "Greenwich", "Hackney", "Hammersmith and Fulham",
    "Haringey", "Harrow", "Havering", "Hillingdon", "Hounslow", "Islington",
    "Kensington and Chelsea", "Kingston upon Thames", "Lambeth", "Lewisham",
    "Merton", "Newham", "Redbridge", "Richmond upon Thames", "Southwark",
    "Sutton", "Tower Hamlets", "Waltham Forest", "Wandsworth", "Westminster",
    "City of London" , 
    'Liverpool', 'Wirral', 'Sefton', 'Knowsley', 'St. Helens'
]

LSOAs = LSOA_names[LSOA_names['LAD23NM'].isin(boroughs)]
LSOAs = LSOAs[['LSOA21CD', 'LAD23NM']]
LSOAs.columns = ['LSOA code', 'Borough']
LSOAs

dataset = dataset.merge(LSOAs, on='LSOA code', how='right')
dataset

Unnamed: 0,LSOA code,Stop_count_drugs,Arrest_outcome,Train_distance,Stop_Count,Borough
0,E01006434,99.0,7.0,2017.80,114.0,Knowsley
1,E01006435,91.0,6.0,1844.68,113.0,Knowsley
2,E01006436,200.0,8.0,1446.13,226.0,Knowsley
3,E01006437,560.0,35.0,1164.51,673.0,Knowsley
4,E01006438,43.0,3.0,4154.79,52.0,Knowsley
...,...,...,...,...,...,...
5912,E01004660,3.0,1.0,437.26,19.0,Westminster
5913,E01004661,66.0,34.0,455.35,154.0,Westminster
5914,E01004662,12.0,6.0,263.32,25.0,Westminster
5915,E01004663,44.0,12.0,548.45,95.0,Westminster


## LSOA % non-white
https://www.nomisweb.co.uk/sources/census_2021_bulk

In [150]:
LSOA_ethnic = pd.read_csv('../data/LSOA_data/2021census_ethnic.csv') 

nonWhite =  (1- LSOA_ethnic['Ethnic group: White'] / LSOA_ethnic.iloc[:,3])*100
LSOA_ethnic
LSOA_ethnic = LSOA_ethnic.iloc[:, [2,3]]
LSOA_ethnic['nonWhite_pop'] = round(nonWhite,2)
LSOA_ethnic.columns = ['LSOA code', 'population', 'nonWhite_pop_percentage']
LSOA_ethnic

dataset = dataset.merge(LSOA_ethnic, on='LSOA code', how='left')
dataset

Unnamed: 0,LSOA code,Stop_count_drugs,Arrest_outcome,Train_distance,Stop_Count,Borough,population,nonWhite_pop_percentage
0,E01006434,99.0,7.0,2017.80,114.0,Knowsley,1518,1.91
1,E01006435,91.0,6.0,1844.68,113.0,Knowsley,1524,3.35
2,E01006436,200.0,8.0,1446.13,226.0,Knowsley,1457,3.29
3,E01006437,560.0,35.0,1164.51,673.0,Knowsley,1387,3.03
4,E01006438,43.0,3.0,4154.79,52.0,Knowsley,1153,5.29
...,...,...,...,...,...,...,...,...
5912,E01004660,3.0,1.0,437.26,19.0,Westminster,1430,41.54
5913,E01004661,66.0,34.0,455.35,154.0,Westminster,1998,49.80
5914,E01004662,12.0,6.0,263.32,25.0,Westminster,1318,31.64
5915,E01004663,44.0,12.0,548.45,95.0,Westminster,1417,37.47


## Income Domain Score

https://www.gov.uk/government/statistics/indices-of-deprivation-2019-income-and-employment-domains-combined-for-england-and-wales

In [138]:
LSOA_IDS = pd.read_excel('../data/LSOA_data/Income_domain_scores.ods', sheet_name=1, engine="odf")
LSOA_IDS = LSOA_IDS[['LSOA Code (2011)', 'Income Domain Score']]
LSOA_IDS.columns = ['LSOA code', 'Income Domain Score']

LSOA_IDS

Unnamed: 0,LSOA code,Income Domain Score
0,E01000001,0.007
1,E01000002,0.034
2,E01000003,0.086
3,E01000005,0.211
4,E01000006,0.117
...,...,...
34748,W01001954,0.049
34749,W01001955,0.420
34750,W01001956,0.038
34751,W01001957,0.234


In [151]:
dataset = dataset.merge(LSOA_IDS, on='LSOA code', how='left')
dataset

Unnamed: 0,LSOA code,Stop_count_drugs,Arrest_outcome,Train_distance,Stop_Count,Borough,population,nonWhite_pop_percentage,Income Domain Score
0,E01006434,99.0,7.0,2017.80,114.0,Knowsley,1518,1.91,0.361
1,E01006435,91.0,6.0,1844.68,113.0,Knowsley,1524,3.35,0.370
2,E01006436,200.0,8.0,1446.13,226.0,Knowsley,1457,3.29,0.384
3,E01006437,560.0,35.0,1164.51,673.0,Knowsley,1387,3.03,0.326
4,E01006438,43.0,3.0,4154.79,52.0,Knowsley,1153,5.29,0.099
...,...,...,...,...,...,...,...,...,...
5912,E01004660,3.0,1.0,437.26,19.0,Westminster,1430,41.54,0.035
5913,E01004661,66.0,34.0,455.35,154.0,Westminster,1998,49.80,0.126
5914,E01004662,12.0,6.0,263.32,25.0,Westminster,1318,31.64,0.043
5915,E01004663,44.0,12.0,548.45,95.0,Westminster,1417,37.47,0.092


## Crime Data

In [152]:

crime_counts = crimes_df['LSOA code'].value_counts().reset_index()
drug_counts = crimes_df.loc[crimes_df['Crime type'] == 'Drugs', 'LSOA code'].value_counts().reset_index()

drug_counts.columns = ['LSOA code', 'drug_crime_sum']
crime_counts.columns = ['LSOA code', 'crime_sum']


dataset = dataset.merge(crime_counts, on='LSOA code', how='left')
dataset = dataset.merge(drug_counts, on='LSOA code', how='left')
dataset

Unnamed: 0,LSOA code,Stop_count_drugs,Arrest_outcome,Train_distance,Stop_Count,Borough,population,nonWhite_pop_percentage,Income Domain Score,crime_sum,drug_crime_sum
0,E01006434,99.0,7.0,2017.80,114.0,Knowsley,1518,1.91,0.361,234.0,27.0
1,E01006435,91.0,6.0,1844.68,113.0,Knowsley,1524,3.35,0.370,150.0,18.0
2,E01006436,200.0,8.0,1446.13,226.0,Knowsley,1457,3.29,0.384,317.0,59.0
3,E01006437,560.0,35.0,1164.51,673.0,Knowsley,1387,3.03,0.326,941.0,77.0
4,E01006438,43.0,3.0,4154.79,52.0,Knowsley,1153,5.29,0.099,92.0,7.0
...,...,...,...,...,...,...,...,...,...,...,...
5912,E01004660,3.0,1.0,437.26,19.0,Westminster,1430,41.54,0.035,374.0,1.0
5913,E01004661,66.0,34.0,455.35,154.0,Westminster,1998,49.80,0.126,788.0,17.0
5914,E01004662,12.0,6.0,263.32,25.0,Westminster,1318,31.64,0.043,331.0,2.0
5915,E01004663,44.0,12.0,548.45,95.0,Westminster,1417,37.47,0.092,350.0,16.0



## House Prices
https://www.ons.gov.uk/peoplepopulationandcommunity/housing/datasets/meanpricepaidbylowerlayersuperoutputareahpssadataset47



In [153]:
# Read the 5th sheet (index 4, since indexing starts from 0)
xls = pd.read_excel('../data/LSOA_data/LSOA_house_prices.xls', 
                    sheet_name=5, 
                    engine='xlrd')

headings = xls.iloc[4]
house_prices = xls.iloc[5:]

# headings
house_prices.columns = headings

house_prices = house_prices[['Local authority code','LSOA code', 
                             'Year ending Mar 2022', 'Year ending Jun 2022', 'Year ending Sep 2022', 'Year ending Dec 2022']]


house_prices['mean_house_price'] = (
    house_prices.iloc[:, 2:]
    .apply(pd.to_numeric, errors='coerce')  # Convert non-numeric values to NaN
    .mean(axis=1)
    .round()
)

house_prices = house_prices[['LSOA code', 'mean_house_price']]
house_prices 

dataset = dataset.merge(house_prices, on='LSOA code', how='left')
dataset


Unnamed: 0,LSOA code,Stop_count_drugs,Arrest_outcome,Train_distance,Stop_Count,Borough,population,nonWhite_pop_percentage,Income Domain Score,crime_sum,drug_crime_sum,mean_house_price
0,E01006434,99.0,7.0,2017.80,114.0,Knowsley,1518,1.91,0.361,234.0,27.0,123955.0
1,E01006435,91.0,6.0,1844.68,113.0,Knowsley,1524,3.35,0.370,150.0,18.0,134664.0
2,E01006436,200.0,8.0,1446.13,226.0,Knowsley,1457,3.29,0.384,317.0,59.0,111733.0
3,E01006437,560.0,35.0,1164.51,673.0,Knowsley,1387,3.03,0.326,941.0,77.0,119648.0
4,E01006438,43.0,3.0,4154.79,52.0,Knowsley,1153,5.29,0.099,92.0,7.0,331221.0
...,...,...,...,...,...,...,...,...,...,...,...,...
5912,E01004660,3.0,1.0,437.26,19.0,Westminster,1430,41.54,0.035,374.0,1.0,1655186.0
5913,E01004661,66.0,34.0,455.35,154.0,Westminster,1998,49.80,0.126,788.0,17.0,1283005.0
5914,E01004662,12.0,6.0,263.32,25.0,Westminster,1318,31.64,0.043,331.0,2.0,1427386.0
5915,E01004663,44.0,12.0,548.45,95.0,Westminster,1417,37.47,0.092,350.0,16.0,1817948.0


# Fill NA Stops with 0


In [161]:
dataset['Stop_Count'] = dataset['Stop_Count'].fillna(0)
sum(dataset['Stop_Count'].isna())

0

# Split Data into Cities

In [162]:
m_boroughs = ['Liverpool', 'Wirral', 'Sefton', 'Knowsley', 'St. Helens']


merseyside = dataset.loc[dataset['Borough'].isin(m_boroughs)]
london = dataset.loc[~dataset['Borough'].isin(m_boroughs)]


In [163]:
display(london, merseyside)

Unnamed: 0,LSOA code,Stop_count_drugs,Arrest_outcome,Train_distance,Stop_Count,Borough,population,nonWhite_pop_percentage,Income Domain Score,crime_sum,drug_crime_sum,mean_house_price
923,E01000001,54.0,10.0,367.21,94.0,City of London,1474,19.88,0.007,251.0,14.0,966662.0
924,E01000002,93.0,22.0,240.85,143.0,City of London,1386,21.79,0.034,358.0,11.0,1050363.0
925,E01000003,12.0,7.0,531.66,35.0,City of London,1612,25.87,0.086,130.0,2.0,592861.0
926,E01000005,282.0,92.0,339.53,476.0,City of London,1101,61.49,0.211,735.0,49.0,
927,E01032739,,,,0.0,City of London,1620,32.22,0.014,6242.0,359.0,1078073.0
...,...,...,...,...,...,...,...,...,...,...,...,...
5912,E01004660,3.0,1.0,437.26,19.0,Westminster,1430,41.54,0.035,374.0,1.0,1655186.0
5913,E01004661,66.0,34.0,455.35,154.0,Westminster,1998,49.80,0.126,788.0,17.0,1283005.0
5914,E01004662,12.0,6.0,263.32,25.0,Westminster,1318,31.64,0.043,331.0,2.0,1427386.0
5915,E01004663,44.0,12.0,548.45,95.0,Westminster,1417,37.47,0.092,350.0,16.0,1817948.0


Unnamed: 0,LSOA code,Stop_count_drugs,Arrest_outcome,Train_distance,Stop_Count,Borough,population,nonWhite_pop_percentage,Income Domain Score,crime_sum,drug_crime_sum,mean_house_price
0,E01006434,99.0,7.0,2017.80,114.0,Knowsley,1518,1.91,0.361,234.0,27.0,123955.0
1,E01006435,91.0,6.0,1844.68,113.0,Knowsley,1524,3.35,0.370,150.0,18.0,134664.0
2,E01006436,200.0,8.0,1446.13,226.0,Knowsley,1457,3.29,0.384,317.0,59.0,111733.0
3,E01006437,560.0,35.0,1164.51,673.0,Knowsley,1387,3.03,0.326,941.0,77.0,119648.0
4,E01006438,43.0,3.0,4154.79,52.0,Knowsley,1153,5.29,0.099,92.0,7.0,331221.0
...,...,...,...,...,...,...,...,...,...,...,...,...
918,E01034836,,,,0.0,Wirral,1253,7.10,,9.0,2.0,
919,E01034837,,,,0.0,Wirral,1638,6.59,,10.0,2.0,
920,E01034838,,,,0.0,Wirral,1022,8.41,,7.0,2.0,
921,E01034839,,,,0.0,Wirral,1043,7.86,,11.0,2.0,


### Rename columns for ease

In [164]:
cols = ['LSOA21CD', 'StopCountDrugs', 'ArrestOutcome', 'DistToTrainStation',
       'StopCount', 'Borough', 'Population', 'NonWhitePopulationPercentage',
       'IncomeDomainScore', 'CrimeSum', 'DrugCrimeSum',
       'MeanHousePrice']

merseyside.columns = cols
london.columns = cols

display(merseyside, london)

Unnamed: 0,LSOA21CD,StopCountDrugs,ArrestOutcome,DistToTrainStation,StopCount,Borough,Population,NonWhitePopulationPercentage,IncomeDomainScore,CrimeSum,DrugCrimeSum,MeanHousePrice
0,E01006434,99.0,7.0,2017.80,114.0,Knowsley,1518,1.91,0.361,234.0,27.0,123955.0
1,E01006435,91.0,6.0,1844.68,113.0,Knowsley,1524,3.35,0.370,150.0,18.0,134664.0
2,E01006436,200.0,8.0,1446.13,226.0,Knowsley,1457,3.29,0.384,317.0,59.0,111733.0
3,E01006437,560.0,35.0,1164.51,673.0,Knowsley,1387,3.03,0.326,941.0,77.0,119648.0
4,E01006438,43.0,3.0,4154.79,52.0,Knowsley,1153,5.29,0.099,92.0,7.0,331221.0
...,...,...,...,...,...,...,...,...,...,...,...,...
918,E01034836,,,,0.0,Wirral,1253,7.10,,9.0,2.0,
919,E01034837,,,,0.0,Wirral,1638,6.59,,10.0,2.0,
920,E01034838,,,,0.0,Wirral,1022,8.41,,7.0,2.0,
921,E01034839,,,,0.0,Wirral,1043,7.86,,11.0,2.0,


Unnamed: 0,LSOA21CD,StopCountDrugs,ArrestOutcome,DistToTrainStation,StopCount,Borough,Population,NonWhitePopulationPercentage,IncomeDomainScore,CrimeSum,DrugCrimeSum,MeanHousePrice
923,E01000001,54.0,10.0,367.21,94.0,City of London,1474,19.88,0.007,251.0,14.0,966662.0
924,E01000002,93.0,22.0,240.85,143.0,City of London,1386,21.79,0.034,358.0,11.0,1050363.0
925,E01000003,12.0,7.0,531.66,35.0,City of London,1612,25.87,0.086,130.0,2.0,592861.0
926,E01000005,282.0,92.0,339.53,476.0,City of London,1101,61.49,0.211,735.0,49.0,
927,E01032739,,,,0.0,City of London,1620,32.22,0.014,6242.0,359.0,1078073.0
...,...,...,...,...,...,...,...,...,...,...,...,...
5912,E01004660,3.0,1.0,437.26,19.0,Westminster,1430,41.54,0.035,374.0,1.0,1655186.0
5913,E01004661,66.0,34.0,455.35,154.0,Westminster,1998,49.80,0.126,788.0,17.0,1283005.0
5914,E01004662,12.0,6.0,263.32,25.0,Westminster,1318,31.64,0.043,331.0,2.0,1427386.0
5915,E01004663,44.0,12.0,548.45,95.0,Westminster,1417,37.47,0.092,350.0,16.0,1817948.0


### Save to CSV

In [165]:
merseyside.to_csv(f'../data/merseyside{csv_names[1].split('/')[3][:4]}.csv')
london.to_csv(f'../data/london{csv_names[1].split('/')[3][:4]}.csv')