# Imports

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np

# Load data

In [2]:
raw_lsoa_df = gpd.read_file("datasets/2_cleaned/cleaned_lsoa.gpkg")
raw_os_greenspace_df = gpd.read_file("datasets/2_cleaned/cleaned_os_greenspace.gpkg")
raw_ua_greenspace_df = gpd.read_file("datasets/2_cleaned/cleaned_ua_greenspace.gpkg")

In [3]:
lsoa_df = raw_lsoa_df.copy()
os_greenspace_df = raw_os_greenspace_df.copy()
ua_greenspace_df = raw_ua_greenspace_df.copy()

# Combine greenspace datasets

In [4]:
# Combine greenspace datasets
combined_greenspace = pd.concat([
    ua_greenspace_df[['geometry']], 
    os_greenspace_df[['geometry']]
], ignore_index=True)

# Merge overlapping areas while keeping separate areas separate
unioned = combined_greenspace.geometry.union_all()
raw_combined_greenspace = gpd.GeoDataFrame(geometry=[unioned], crs=ua_greenspace_df.crs)
raw_combined_greenspace = raw_combined_greenspace.explode(index_parts=False).reset_index(drop=True)

In [5]:
combined_greenspace = raw_combined_greenspace.copy()

# Calculate greenspace proportion per LSOA

In [6]:
lsoa_df["total_area"] = lsoa_df.geometry.area
overlay = gpd.overlay(combined_greenspace, lsoa_df, how = "intersection")
lsoa_greenspace = overlay.groupby('lsoa')['geometry'].apply(lambda x: x.area.sum())
lsoa_df['greenspace_area'] = lsoa_df['lsoa'].map(lsoa_greenspace).fillna(0)
lsoa_df['greenspace_proportion'] = lsoa_df['greenspace_area'] / lsoa_df["total_area"].replace(0, np.nan)

# Drop unneeded features

In [7]:
lsoa_df = lsoa_df.drop(columns = ["lsoa", "total_area", "greenspace_area"])

# Save output

In [8]:
lsoa_df.to_file("datasets/3_combined/df.gpkg", driver = "GPKG", index = False)