## Post Processing

Match the schema to OS: 

* Freshwater -	Lake
* Freshwater -	Pond
* Freshwater -	River
* Freshwater -	Stream
* Freshwater -	Canal
* Urban Greenspace -	Playing fields (i.e. grass without trees)
* Urban Greenspace -	Parks (i.e. grass with some trees)
* Urban Greenspace -	Cemeteries
* Suburban (areas with a mixture of buildings and gardens)
* Inland Rock -	Field boundary dry stone walls
* Grassy Linear Features -	Arable field margins
* Grassy Linear Features -	Railway lines
* Grassy Linear Features -	Road verges

Collections OS schema:

* 'building' 'bld-fts-building-2' - https://docs.os.uk/osngd/code-lists/code-lists-overview/buildingdescriptionvalue
* 'sites': 'lus-fts-site-1' - https://docs.os.uk/osngd/code-lists/code-lists-overview/sitedescriptionvalue 
* 'railways': 'trn-fts-rail-1'- https://docs.os.uk/osngd/code-lists/code-lists-overview/raildescriptionvalue
* 'land': 'lnd-fts-land-1' - https://docs.os.uk/osngd/code-lists/code-lists-overview/landdescriptionvalue 
* 'road': 'trn-fts-roadtrackorpath-2' - https://docs.os.uk/osngd/code-lists/code-lists-overview/roadtrackorpathdescriptionvalue 
* 'water': 'wtr-fts-water-2' - https://docs.os.uk/osngd/code-lists/code-lists-overview/waterdescriptionvalue 
* 'waterlink': 'wtr-ntwk-waterlink-1' - https://docs.os.uk/osngd/code-lists/code-lists-overview/waterlinksetdescriptionvalue 
* 'field': 'str-fts-fieldboundary-1' - https://docs.os.uk/osngd/code-lists/code-lists-overview/fieldboundarydescriptionvalue 



In [131]:
import os
import json
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon, MultiPolygon, LineString, MultiLineString

In [None]:
# Load JSON mapping needs changing to new filepath in label_mapping_dicts
with open('os-lc-map.json') as f: # os-lc-map.json is a JSON file that maps OS Land Cover codes to the corresponding land cover types
    os_lc_map = json.load(f)

In [133]:
# Dictionary to map file names to their ranking order
file_order = {
    1: ('wtr_fts_water.gpkg', 'water'),
    2: ('lus_fts_site.gpkg', 'sites'),
    3: ('trn_fts_rail.gpkg', 'railways'),
    4: ('trn_fts_roadtrackorpath.gpkg', 'roads'),
    5: ('bld_fts_building.gpkg', 'buildings'),
    6: ('lnd_fts_land.gpkg', 'land'),
    7: ('str_fts_fieldboundary.gpkg', 'field'),
    8: ('wtr_ntwk_waterlink.gpkg', 'waterlink')
}


In [None]:
# Output shapefile path
output_shapefile_polygons = "os_ngd.shp" # path needs updating to new filepath
output_shapefile_lines = "os_ngd_lines.shp"

In [135]:
# Function to process each geopackage file
def process_gpkg(file_name, category, mapping):
    print(f"Processing {file_name} for category {category}")
    gdf = gpd.read_file(file_name, layer=0)  # Read the first layer by default
    print(f"Initial number of features: {len(gdf)}")

    if not gdf.empty:
        # Filter based on description and add 'ADAPT' column
        unique_descriptions = set(gdf['description'].unique())
        mapped_descriptions = set(mapping.keys())
        unmapped_descriptions = unique_descriptions - mapped_descriptions

        if unmapped_descriptions:
            print(f"Unmapped descriptions in {file_name}: {unmapped_descriptions}")

        gdf = gdf[gdf['description'].isin(mapping.keys())]  # Keep only mapped descriptions
        gdf['ADAPT'] = gdf['description'].map(mapping)
        print(f"Number of features after filtering: {len(gdf)}")
        print(f"Unique ADAPT labels after filtering: {gdf['ADAPT'].unique()}")
    return gdf

In [136]:
# List to hold all GeoDataFrames
gdfs_polygons = []
gdfs_lines = []

In [137]:
# Process each file according to the defined order
for rank in sorted(file_order.keys()):
    file_name, category = file_order[rank]
    print(f"Processing rank {rank}: file {file_name}, category {category}")
    mapping = os_lc_map.get(category, {})
    gdf = process_gpkg(file_name, category, mapping)
    if not gdf.empty:
        gdf['rank'] = rank  # Add a rank column to manage overlap
        # Separate polygons and lines
        gdfs_polygons.append(gdf[gdf.geometry.type.isin(['Polygon', 'MultiPolygon'])])
        gdfs_lines.append(gdf[gdf.geometry.type.isin(['LineString', 'MultiLineString'])])

Processing rank 1: file wtr_fts_water.gpkg, category water
Processing wtr_fts_water.gpkg for category water
Initial number of features: 2925
Unmapped descriptions in wtr_fts_water.gpkg: {'Leat', 'Collects', 'Reed Bed For Waste Water', 'Open Water Tank', 'Drain', 'Open Tank Reservoir', 'Mill Leat', 'Swimming Pool', 'Lock', 'Overflow', 'Waterfall', 'Spring'}
Number of features after filtering: 2825
Unique ADAPT labels after filtering: ['River' 'Pond' 'Lake' 'Canal']
Processing rank 2: file lus_fts_site.gpkg, category sites
Processing lus_fts_site.gpkg for category sites
Initial number of features: 24646
Unmapped descriptions in lus_fts_site.gpkg: {'Central Government Services', 'Camp Site', 'Youth Organisation Camp Site', 'Commercial Vehicle Park', 'Youth Recreational Or Social Club', 'Athletics Ground', 'Kingdom Hall', 'Hockey Ground', 'Distribution Or Storage Site', 'Football Ground (Spectating)', 'Arboretum', 'Balancing Pond', 'Theatre', 'University', 'Printing Works', 'Art Gallery An

In [138]:
# Merge all polygon GeoDataFrames
if gdfs_polygons:
    combined_gdf_polygons = gpd.GeoDataFrame(pd.concat(gdfs_polygons, ignore_index=True))

    # Ensure all geometries are polygons or multipolygons
    combined_gdf_polygons['geometry'] = combined_gdf_polygons['geometry'].apply(lambda geom: geom if isinstance(geom, (Polygon, MultiPolygon)) else geom.convex_hull)

    # Drop all columns except 'ADAPT' and 'geometry'
    combined_gdf_polygons = combined_gdf_polygons[['ADAPT', 'geometry']]

    # Check for unique labels before saving
    print(f"Unique ADAPT labels in polygons before saving: {combined_gdf_polygons['ADAPT'].unique()}")

    combined_gdf_polygons.to_file(output_shapefile_polygons, driver='ESRI Shapefile')
    print(f"Polygon shapefile saved as {output_shapefile_polygons}")
    print(f"Unique ADAPT labels in polygon shapefile: {combined_gdf_polygons['ADAPT'].unique()}")
else:
    print("No polygon data to process and merge.")
    combined_gdf_polygons = gpd.GeoDataFrame()

Unique ADAPT labels in polygons before saving: ['River' 'Pond' 'Lake' 'Canal' 'Suburban' 'Dense urban' 'Cemeteries'
 'Playing fields (i.e. grass without trees)' 'Railway' 'Railway verges'
 'Road' 'Road verges']
Polygon shapefile saved as os_ngd.shp
Unique ADAPT labels in polygon shapefile: ['River' 'Pond' 'Lake' 'Canal' 'Suburban' 'Dense urban' 'Cemeteries'
 'Playing fields (i.e. grass without trees)' 'Railway' 'Railway verges'
 'Road' 'Road verges']


In [139]:
if gdfs_lines:
    combined_gdf_lines = gpd.GeoDataFrame(pd.concat(gdfs_lines, ignore_index=True))

    # Ensure all geometries are linestrings or multilinestrings
    combined_gdf_lines['geometry'] = combined_gdf_lines['geometry'].apply(lambda geom: geom if isinstance(geom, (LineString, MultiLineString)) else geom.convex_hull)

    # Drop all columns except 'ADAPT' and 'geometry'
    combined_gdf_lines = combined_gdf_lines[['ADAPT', 'geometry']]

    # Check for unique labels before saving
    print(f"Unique ADAPT labels in lines before saving: {combined_gdf_lines['ADAPT'].unique()}")

    # Clip lines by the polygons
    if not combined_gdf_polygons.empty:
        combined_gdf_lines = gpd.overlay(combined_gdf_lines, combined_gdf_polygons, how='difference')
        print(f"Lines clipped by polygons. Number of features after clipping: {len(combined_gdf_lines)}")

    combined_gdf_lines.to_file(output_shapefile_lines, driver='ESRI Shapefile')
    print(f"Line shapefile saved as {output_shapefile_lines}")
    print(f"Unique ADAPT labels in line shapefile: {combined_gdf_lines['ADAPT'].unique()}")
else:
    print("No line data to process and merge.")

Unique ADAPT labels in lines before saving: ['Field boundary dry stone walls' 'Stream']
Lines clipped by polygons. Number of features after clipping: 58110
Line shapefile saved as os_ngd_lines.shp
Unique ADAPT labels in line shapefile: ['Field boundary dry stone walls' 'Stream']
