# Imports

In [1]:
import pandas as pd
import geopandas as gpd
import glob
import numpy as np
from functools import reduce

# Load datasets

In [2]:
LSOAs = gpd.read_file("datasets/LSOAs/LSOA_2021_EW_BSC_V4.shp")
health = pd.read_excel("datasets/Demographic/health.xlsx")
os_greenspace = gpd.read_file("datasets/OSOpenGreenspace/data/TQ_GreenspaceSite.shp")
ua_shapefiles = glob.glob("datasets/UrbanAtlas/Results/UA_2018_GPKG_NUTS3.shp/*.shp")
ua_greenspace = pd.concat([gpd.read_file(f).query("fua_name == 'London'") for f in ua_shapefiles if not gpd.read_file(f).query("fua_name == 'London'").empty], ignore_index=True)
age_sex = pd.read_excel("datasets/Demographic/age_sex.xlsx")
ethnicity = pd.read_excel("datasets/Demographic/ethnicity.xlsx")
deprivation = pd.read_excel("datasets/Demographic/deprivation.xlsx")
dfs = [LSOAs, health, os_greenspace, ua_greenspace, age_sex, ethnicity, deprivation]

# Clean columns

In [3]:
# Clean column names
for df in dfs:
    df.columns = df.columns.str.strip().str.replace(' ', '_').str.lower()

# Rename columns
LSOAs = LSOAs.rename(columns={'lsoa21cd': 'lsoa'})
health = health.rename(columns={"lsoa_code": "lsoa"})
age_sex = age_sex.rename(columns={"lsoa_2021_code": "lsoa"})
ethnicity = ethnicity.rename(columns={'lsoa_code': 'lsoa', 'other_any_other': 'any_other'})
deprivation = deprivation.rename(columns={'lsoa_code_(2011)': 'lsoa', 'index_of_multiple_deprivation_(imd)_rank_(where_1_is_most_deprived)':'imd'})

# Remove unneeded columns
LSOAs = LSOAs[['lsoa', 'geometry']]
health = health.drop(columns = ['local_authority_code', 'local_authority_name'])
os_greenspace = os_greenspace[['function', 'geometry']]
ua_greenspace = ua_greenspace[['class_2018', 'geometry']]
age_sex = age_sex.drop(columns = ['lad_2021_code', 'lad_2021_name', 'lsoa_2021_name'])
ethnicity = ethnicity.drop(columns=['local_authority_name', 'local_authority_code'])
deprivation = deprivation[['lsoa', 'imd']]

# Align geometries

In [4]:
LSOAs = LSOAs.to_crs('EPSG:3035')
os_greenspace = os_greenspace.to_crs('EPSG:3035')

# Prepare Urban Atlas data

In [5]:
greenspace_classes = ["Arable land (annual crops)", "Pastures", "Forests", "Herbaceous vegetation associations (natural grassland, moors...)", "Green urban areas", "Wetlands", "Permanent crops (vineyards, fruit trees, olive groves)"]
ua_greenspace = ua_greenspace[ua_greenspace["class_2018"].isin(greenspace_classes)]

# Standardise absolute demographic figures to proportions

In [6]:
health_cols = ['very_good_health', 'good_health', 'fair_health', 'bad_health', 'very_bad_health']
age_sex_excluded_cols = ['lsoa', 'total']
age_sex_cols = [col for col in age_sex.columns if col not in age_sex_excluded_cols]
ethnicity_cols = ['white_british', 'white_irish',
       'white_gypsy/irish_traveller', 'white_roma', 'white_other',
       'mixed_white_and_asian', 'mixed_white_and_black_african',
       'mixed_white_and_black_caribbean', 'mixed_other', 'asian_bangladeshi',
       'asian_chinese', 'asian_indian', 'asian_pakistani', 'asian_other',
       'black_african', 'black_caribbean', 'black_other', 'other_arab',
       'any_other']

for col in health_cols:
    health[col] = health[col] / health['all_usual_residents'].replace(0, np.nan)

for col in age_sex_cols:
      age_sex[col] = age_sex[col] / age_sex['total'].replace(0, np.nan)

for col in ethnicity_cols:
       ethnicity[col] = ethnicity[col] / ethnicity['all_usual_residents'].replace(0, np.nan)

health = health.drop(columns=['all_usual_residents'])
age_sex = age_sex.drop(columns=['total'])
ethnicity = ethnicity.drop(columns=['all_usual_residents'])

# Combine LSOA and demographic datasets

In [None]:
demographic_datasets = [health, age_sex, ethnicity, deprivation]
# Inner merge drops LSOAs without data from all dfs (i.e. those outside London)
merged_demographic_datasets = reduce(lambda left, right: left.merge(right, on='lsoa', how='inner'), demographic_datasets)
# Inner merge drops LSOAs outside London
LSOAs = LSOAs.merge(merged_demographic_datasets, on='lsoa', how='inner')

# Combine greenspace datasets

In [8]:
combined_greenspace = ua_greenspace.overlay(os_greenspace, how = "union")
combined_greenspace.columns = combined_greenspace.columns.str.strip().str.replace(' ', '_')



# Overlay LSOAs onto greenspace

In [9]:
LSOAs["total_area"] = LSOAs.geometry.area
overlay = gpd.overlay(combined_greenspace, LSOAs, how = "intersection")
LSOA_greenspace = overlay.groupby('lsoa')['geometry'].apply(lambda x: x.area.sum())
LSOAs['greenspace_area'] = LSOAs['lsoa'].map(LSOA_greenspace).fillna(0)
LSOAs['greenspace_proportion'] = LSOAs['greenspace_area'] / LSOAs["total_area"].replace(0, np.nan)



In [10]:
LSOAs.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 4659 entries, 0 to 4658
Columns: 212 entries, lsoa to greenspace_proportion
dtypes: float64(209), geometry(1), int64(1), object(1)
memory usage: 7.5+ MB
