In [1]:
import logging

import click
import geopandas as gpd

from deafrica_conflux.cli.logs import logging_setup
from deafrica_conflux.group_polygons import get_polygon_length, split_polygons_by_region
from deafrica_conflux.id_field import guess_id_field

In [2]:
import os
# These are the default AWS configurations for the Analysis Sandbox.
# that are set in the environmnet variables. 
aws_default_config = {
    #'AWS_NO_SIGN_REQUEST': 'YES', 
    'AWS_SECRET_ACCESS_KEY': 'fake',
    'AWS_ACCESS_KEY_ID': 'fake',
}

# To access public bucket, need to remove the AWS credentials in 
# the environment variables or the following error will occur.
# PermissionError: The AWS Access Key Id you provided does not exist in our records.

for key in aws_default_config.keys():
    if key in os.environ:
        del os.environ[key]

In [3]:
verbose=1
product="wofs_ls"
polygons_vector_file = "s3://deafrica-waterbodies-dev/waterbodies/v0.0.1/historical_extent/waterbodies.shp"
use_id = "UID"
output_directory = "s3://deafrica-waterbodies-dev/waterbodies/v0.0.1/historical_extent_split_by_wofs_region/"

In [4]:
# Set up logger.
logging_setup(verbose)
_log = logging.getLogger(__name__)

In [5]:
# Read the polygons vector file.
try:
    polygons_gdf = gpd.read_file(polygons_vector_file)
except Exception as error:
    _log.exception(f"Could not read file {polygons_vector_file}")
    raise error
else:
    # Guess the ID field.
    id_field = guess_id_field(polygons_gdf, use_id)
    _log.info(f"Guessed ID field: {id_field}")

    # Set the ID field as the index.
    polygons_gdf.set_index(id_field, inplace=True)

[2023-11-16 21:23:46,432] {id_field.py:64} INFO - Values in the column UID are unique.
[2023-11-16 21:23:46,433] {558686588.py:10} INFO - Guessed ID field: UID


In [6]:
# Get the original crs of the polygons
original_crs = polygons_gdf.crs
# Get the orginal count of the polygons.
original_count = len(polygons_gdf)

In [7]:
# Reproject to a projected CRS.
polygons_gdf = polygons_gdf.to_crs("EPSG:6933")
assert polygons_gdf.crs.is_projected

In [8]:
# Get the length of each polygon.
polygons_gdf['polygon_length_m'] = polygons_gdf['geometry'].apply(get_polygon_length)

In [9]:
polygons_gdf

Unnamed: 0_level_0,area_m2,WB_ID,perim_m,timeseries,geometry,polygon_length_m
UID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
e9pvzx5t2,11700.0000,0,780.0,https://deafrica-services.s3.af-south-1.amazon...,"POLYGON ((-1087200.000 849749.998, -1087080.00...",150.000000
e9pvzxcs1,147599.9999,1,4260.0,https://deafrica-services.s3.af-south-1.amazon...,"POLYGON ((-1087140.000 850709.998, -1087080.00...",1015.985711
e9pvzzx9v,6300.0000,2,360.0,https://deafrica-services.s3.af-south-1.amazon...,"POLYGON ((-1085580.000 849989.998, -1085460.00...",120.000000
e9pxxzp4y,11700.0000,3,720.0,https://deafrica-services.s3.af-south-1.amazon...,"POLYGON ((-1119570.000 888719.998, -1119540.00...",257.935591
e9pxzb5ze,44100.0000,4,1680.0,https://deafrica-services.s3.af-south-1.amazon...,"POLYGON ((-1119990.000 889679.998, -1119960.00...",600.000030
...,...,...,...,...,...,...
t4jtr87cd,31500.0000,688491,1080.0,https://deafrica-services.s3.af-south-1.amazon...,"POLYGON ((5120310.000 1541849.997, 5120580.000...",330.000000
t4jv01978,36000.0000,688492,1860.0,https://deafrica-services.s3.af-south-1.amazon...,"POLYGON ((5122260.000 1537589.997, 5122290.000...",763.953332
t4jv046nu,10800.0000,688493,540.0,https://deafrica-services.s3.af-south-1.amazon...,"POLYGON ((5122320.000 1537889.997, 5122350.000...",210.000000
t4jv05973,63000.0000,688494,2100.0,https://deafrica-services.s3.af-south-1.amazon...,"POLYGON ((5122260.000 1538909.997, 5122290.000...",899.999999


In [10]:
# Filter out polygons whose length is larger than a single Landsat scene
ls_scene_length = 185 * 1000
filtered_polygons_gdf = polygons_gdf[polygons_gdf["polygon_length_m"] <= ls_scene_length]
_log.info(f"Filtered out {original_count - len(filtered_polygons_gdf)} polygons out of {original_count} polygons")

[2023-11-16 21:25:05,119] {1157269933.py:4} INFO - Filtered out 34 polygons out of 688496 polygons


In [11]:
# Reproject back to the original crs.
filtered_polygons_gdf = filtered_polygons_gdf.to_crs(original_crs)

In [12]:
# Split the filtered polygons by region. 
split_polygons_fps = split_polygons_by_region(  # noqa F841
    product=product, polygons_gdf=polygons_gdf, output_directory=output_directory
)

[2023-11-16 21:36:45,999] {credentials.py:611} INFO - Found credentials in shared credentials file: ~/.aws/credentials
[2023-11-16 21:36:46,418] {group_polygons.py:107} INFO - Polygons for region 157064 written to s3://deafrica-waterbodies-dev/waterbodies/v0.0.1/historical_extent_split_by_wofs_region/157064.parquet
[2023-11-16 21:36:46,659] {group_polygons.py:107} INFO - Polygons for region 157069 written to s3://deafrica-waterbodies-dev/waterbodies/v0.0.1/historical_extent_split_by_wofs_region/157069.parquet
[2023-11-16 21:36:46,848] {group_polygons.py:107} INFO - Polygons for region 157070 written to s3://deafrica-waterbodies-dev/waterbodies/v0.0.1/historical_extent_split_by_wofs_region/157070.parquet
[2023-11-16 21:36:47,070] {group_polygons.py:107} INFO - Polygons for region 157071 written to s3://deafrica-waterbodies-dev/waterbodies/v0.0.1/historical_extent_split_by_wofs_region/157071.parquet
[2023-11-16 21:36:47,329] {group_polygons.py:107} INFO - Polygons for region 157072 writt