### Step 1: Extract power infrastructure data from OSM
Package: osm-flex

`vietnam-latest.osm.pbf` was downloaded on 24-10-2024.

In [None]:
import os,sys
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
import numpy as np
import pandas as pd
from shapely.geometry import Point, LineString, Polygon, mapping
from tqdm import tqdm
from pathlib import Path

current_dir = os.getcwd()
osm_flex_path = os.path.abspath(os.path.join(current_dir, '../../osm-flex/src'))
sys.path.insert(0, osm_flex_path)

import osm_flex.download as dl
import osm_flex.extract as ex
import osm_flex.config
import osm_flex.simplify as sy

print(osm_flex.config.OSM_CONFIG_FILE)

In [3]:
# iso3 = 'VNM'
# dl.get_country_geofabrik(iso3)

In [4]:
path_vnm_dump = osm_flex.config.OSM_DATA_DIR.joinpath('vietnam-latest.osm.pbf')
gdf_vnm_power = ex.extract_cis(path_vnm_dump, 'power')

extract points: 100%|██████████| 86978/86978 [00:17<00:00, 5019.63it/s]
extract multipolygons: 100%|██████████| 501/501 [00:27<00:00, 18.37it/s]
extract lines: 100%|██████████| 9166/9166 [00:15<00:00, 584.21it/s] 


#### Simplify OSM data

In [5]:
print(f'Number of results: {len(gdf_vnm_power)}')

gdf_vnm_power = sy.remove_contained_points(gdf_vnm_power)
print(f'Number of results after removing points contained in polygons: {len(gdf_vnm_power)}')

gdf_vnm_power = sy.remove_contained_polys(gdf_vnm_power)
print(f'Number of results after removing polygons contained in larger polygons: {len(gdf_vnm_power)}')

gdf_vnm_power = sy.remove_exact_duplicates(gdf_vnm_power)
print(f'Number of results after removing exact geometrical duplicates: {len(gdf_vnm_power)}')

Number of results: 96645


Number of results after removing points contained in polygons: 95572
Number of results after removing polygons contained in larger polygons: 95565


  result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)


Number of results after removing exact geometrical duplicates: 95565


In [6]:
gdf_vnm_power.to_file("../data/osm/vietnam-latest_power.gpkg", driver="GPKG")

In [7]:
osm_plants = gdf_vnm_power[gdf_vnm_power['power'] == 'plant']
print(len(osm_plants))

453


In [8]:
subs = gdf_vnm_power[gdf_vnm_power['power'] == 'substation']
print(len(subs))
subs['osm_id'].isnull().any()

1108


True

In [9]:
"""
    Problem: There are some substations extracted from osm.pbf file by osm_flex don't have osm_id.
    Solution:
        1. osm_subs: Load the GeoPackage containing substations extracted using QGIS QuickOSM tool - 'power_substation_vietnam.gpkg',
            including substations that are of type 'MultiPolygon'.
        2. Use spatial join to find matching geometries between subs and osm_subs.
        3. Add the matched osm_id from osm_subs to the subs DataFrame
"""
osm_subs = gpd.read_file('../data/osm/power_substation_vietnam.gpkg')

if subs.crs != osm_subs.crs:
    osm_subs = osm_subs.to_crs(subs.crs)

# Use spatial join to find matching geometries between `subs` and `osm_subs`.
matched = gpd.sjoin(subs, osm_subs[['geometry', 'osm_id']], op='intersects')

# Add the matched `osm_id` from `osm_subs` to the `osm_id` column of `subs`.
subs.loc[matched.index, 'osm_id'] = matched['osm_id_right']

  if await self.run_code(code, result, async_=asy):


In [10]:
subs['osm_id'].isnull().any()

False

In [11]:
subs.to_file("../data/osm/vietnam-latest_substation_supplyPolygonID.gpkg", driver="GPKG")

In [12]:
lines = gdf_vnm_power[gdf_vnm_power['power'] == 'line']
print(len(lines))

7776


In [13]:
lines.to_file("../data/osm/vietnam-latest_lines.gpkg", driver="GPKG")