In [8]:
# Imports

import os
import shutil
import re
import yaml

import geopandas as gpd
from vercye_ops.apsim.convert_shapefile_to_geojson import convert_shapefile_to_geojson

In [None]:
# Define parameters and paths
# Please refer to the documentation to understand on how to define the config.

SHAPEFILE_PATH = "/home/rohan/nasa-harvest/vercye/data/Ukraine/ukraine_shpfiles/Ukraine_Second_level_Administrative_Divisions_2015 (old)-20250509T164252Z-1-001/Ukraine_Second_level_Administrative_Divisions_2015 (old)/UKR_adm2.shp"
ADMIN_COLUMN_NAME = "NAME_2" # Name of the column that contains the administrative division level names for the level of interest in you shapefile.

# Set these, if you want to filter the regions in your shapefile. 
# E.g you could set filter_col_name = 'ADMIN_2' and filter_col_name = 'Arizona' to only print districts in Arizona.
# Ensure the values in filter_col_values are the same as in the shapefile.
FILTER_COL_NAME = "NAME_1"
FILTER_COL_VALUES = ["Dnipropetrovs'k", "Donets'k", "Kharkiv", "Kherson", "Kirovohrad", "Luhans'k", "Mykolayiv", "Odessa", "Zaporizhzhya", "Crimea"]

# Intermediate output folder where to save the extracted geojsons
GEOJSONS_FOLDER = "/home/rohan/nasa-harvest/vercye/data/Ukraine/Ukraine_Regions/Vercye/Run1" 

# In the beginning you want to start out with a snakemake config file that is completely filled out except for the "regions" field
# This script will help you fill out the "regions" field with the regions extracted from the shapefile
OUTPUT_DIR = "/home/rohan/nasa-harvest/vercye/data/Ukraine/vercye_setups/Ukraine_Regions1_09-05-2025" # Directory to save the new head_dir structure and files
SNAKEFILE_CONFIG = "/home/rohan/nasa-harvest/vercye/data/Ukraine/vercye_setups/Ukraine_Regions1_09-05-2025/config_template.yaml" # Config file for the snakemake pipeline


# If you want to use multiple APSIM files, you can specify which regions should be used for which APSIM file here
# This can be done by matching a column in the shapefile to the APSIM file name
# APSIM_TEMPLATE_PATHS_FILTER_COL_NAME specifies the column in the shapefile that should be used to match the APSIM file name
# All regions that have this column value will be used for the corresponding APSIM file
# Enure that APSIM_TEMPLATE_PATHS contains the corresponding column values with the APSIM file path
# If using only one APSIM file, set the col Name to None and name the APSIM file's key 'all'
APSIM_TEMPLATE_PATHS_FILTER_COL_NAME = "NAME_1"
APSIM_TEMPLATE_PATHS = {
    "Dnipropetrovs'k": '/home/rohan/nasa-harvest/vercye/data/Ukraine/APSIM/Mykolayiv_6_5_25.apsimx',
    "Donets'k": '/home/rohan/nasa-harvest/vercye/data/Ukraine/APSIM/Mykolayiv_6_5_25.apsimx',
    "Kharkiv": '/home/rohan/nasa-harvest/vercye/data/Ukraine/APSIM/Mykolayiv_6_5_25.apsimx',
    "Kherson": '/home/rohan/nasa-harvest/vercye/data/Ukraine/APSIM/Mykolayiv_6_5_25.apsimx',
    "Kirovohrad": '/home/rohan/nasa-harvest/vercye/data/Ukraine/APSIM/Mykolayiv_6_5_25.apsimx',
    "Luhans'k": '/home/rohan/nasa-harvest/vercye/data/Ukraine/APSIM/Mykolayiv_6_5_25.apsimx',
    "Mykolayiv": '/home/rohan/nasa-harvest/vercye/data/Ukraine/APSIM/Mykolayiv_6_5_25.apsimx',
    "Odessa": '/home/rohan/nasa-harvest/vercye/data/Ukraine/APSIM/Mykolayiv_6_5_25.apsimx',
    "Zaporizhzhya": '/home/rohan/nasa-harvest/vercye/data/Ukraine/APSIM/Mykolayiv_6_5_25.apsimx',
    "Crimea": '/home/rohan/nasa-harvest/vercye/data/Ukraine/APSIM/Mykolayiv_6_5_25.apsimx'
}

# Overwrite the input config with metadata from here
NEW_SNAKEFILECONFIG_PATH = SNAKEFILE_CONFIG.replace("_template", "")
with open(SNAKEFILE_CONFIG, 'r') as f:
    config = yaml.safe_load(f)

    config['lai_shp_name'] = SHAPEFILE_PATH
    config['lai_shp_col'] = ADMIN_COLUMN_NAME
    config['lai_shp_filter_col'] = FILTER_COL_NAME
    config['lai_shp_filter_values'] = FILTER_COL_VALUES
    config['APSIM_TEMPLATE_PATHS'] = APSIM_TEMPLATE_PATHS

    # Update the config
    with open(NEW_SNAKEFILECONFIG_PATH, 'w') as snakemake_config_writer:
        yaml.dump(config, snakemake_config_writer, default_flow_style=False)

In [10]:
# Extracts geojsons in their corresponding vercye-style directories from a shapefile

config = None
with open(SNAKEFILE_CONFIG) as snakemake_config_reader:
    try:
        config = yaml.safe_load(snakemake_config_reader)
    except yaml.YAMLError as e:
        print(e)

SHAPEFILE_CENTROID_PROJECTION_CRS = config['matching_params']['target_crs']
convert_shapefile_to_geojson(shp_fpath=SHAPEFILE_PATH, admin_name_col=ADMIN_COLUMN_NAME, projection_crs=SHAPEFILE_CENTROID_PROJECTION_CRS, output_head_dir=GEOJSONS_FOLDER)



In [11]:
# Prints the names of the regions that have at least one pixel after rasterization
# Copy the names of the regions that you want to keep from the output into your snakemake config under regions

keep_regions = []
regions_apsimfile  = {}
for f in sorted(os.listdir(GEOJSONS_FOLDER)):
    region_name = f
    geojson_folder_path = os.path.join(GEOJSONS_FOLDER, f)
    if not os.path.isdir(geojson_folder_path):
        continue
    
    gdf = gpd.read_file(os.path.join(geojson_folder_path, f + ".geojson"))

    # Check if the shapefile has  only a single polygon
    if len(gdf) > 1:
        raise ValueError(f"Shapefile {SHAPEFILE_PATH} has more than one polygon. Please make sure the shapefile has only a single polygon.")

    # If user want only a polygons from a specific region, filter
    if FILTER_COL_NAME is not None and not gdf[FILTER_COL_NAME][0] in FILTER_COL_VALUES:
        continue

    if APSIM_TEMPLATE_PATHS_FILTER_COL_NAME is not None:
        if APSIM_TEMPLATE_PATHS_FILTER_COL_NAME not in gdf.columns:
           raise ValueError(f"Column {APSIM_TEMPLATE_PATHS_FILTER_COL_NAME} not found in the shapefile.")
        
        apsim_region_key = gdf[APSIM_TEMPLATE_PATHS_FILTER_COL_NAME][0]
        regions_apsimfile[region_name] = APSIM_TEMPLATE_PATHS[apsim_region_key]
    else:
        apsim_region_key = 'all'
        regions_apsimfile[region_name] = APSIM_TEMPLATE_PATHS['all']

    keep_regions.append(f)

print(regions_apsimfile)

# Update the config with the regions that were kept under the regions field
if config is None:
    print("Config file not found. Please check the path to the config file.")
else:
    config['regions'] = keep_regions
    with open(NEW_SNAKEFILECONFIG_PATH, 'w') as snakemake_config_writer:
        yaml.dump(config, snakemake_config_writer, default_flow_style=False)
        print(f"Updated the config file with the regions: {keep_regions}")
        print(f"Please check the config file at {SNAKEFILE_CONFIG} to make sure everything is correct.")

{'alchevska': '/home/rohan/nasa-harvest/vercye/data/Ukraine/APSIM/Mykolayiv_6_5_25.apsimx', 'alushtynska': '/home/rohan/nasa-harvest/vercye/data/Ukraine/APSIM/Mykolayiv_6_5_25.apsimx', 'amvrosi_vskyi': '/home/rohan/nasa-harvest/vercye/data/Ukraine/APSIM/Mykolayiv_6_5_25.apsimx', 'anan_vskyi': '/home/rohan/nasa-harvest/vercye/data/Ukraine/APSIM/Mykolayiv_6_5_25.apsimx', 'antratsitivska': '/home/rohan/nasa-harvest/vercye/data/Ukraine/APSIM/Mykolayiv_6_5_25.apsimx', 'antratsytivskyi': '/home/rohan/nasa-harvest/vercye/data/Ukraine/APSIM/Mykolayiv_6_5_25.apsimx', 'apostolivskyi': '/home/rohan/nasa-harvest/vercye/data/Ukraine/APSIM/Mykolayiv_6_5_25.apsimx', 'arbuzynskyi': '/home/rohan/nasa-harvest/vercye/data/Ukraine/APSIM/Mykolayiv_6_5_25.apsimx', 'artemivska': '/home/rohan/nasa-harvest/vercye/data/Ukraine/APSIM/Mykolayiv_6_5_25.apsimx', 'artemivskyi': '/home/rohan/nasa-harvest/vercye/data/Ukraine/APSIM/Mykolayiv_6_5_25.apsimx', 'artsyzkyi': '/home/rohan/nasa-harvest/vercye/data/Ukraine/APS

In [12]:
# Creates Folder Structure from Config for years/timepoints, copying all regions
# Ensure you have filled in your config completely before running this script

config = None
with open(NEW_SNAKEFILECONFIG_PATH) as snakemake_config_reader:
    try:
        config = yaml.safe_load(snakemake_config_reader)
    except yaml.YAMLError as e:
        print(e)

years = config['years']
timepoints = config['timepoints']
regions_names = config['regions']

for region_name in regions_names:
    region_file_path = os.path.join(GEOJSONS_FOLDER, region_name, f'{region_name}.geojson')
    
    for year in years:
        year_folder = os.path.join(OUTPUT_DIR, str(year))
        
        for timepoint in timepoints:
            timepoint_folder = os.path.join(year_folder, str(timepoint))
            
            roi_folder = os.path.join(timepoint_folder, region_name)
            os.makedirs(roi_folder, exist_ok=True)

            shutil.copy(region_file_path, roi_folder)

In [13]:
# Copies and adjusts APSIM file to each folder. Adjustment applied for start and end dates as defined in snakemake config for the timepoint

config = None
with open(NEW_SNAKEFILECONFIG_PATH) as snakemake_config_reader:
    try:
        config = yaml.safe_load(snakemake_config_reader)
    except yaml.YAMLError as e:
        print(e)

years = config['years']
timepoints = config['timepoints']
regions_names = config['regions']

for year in years:
    year_folder = os.path.join(OUTPUT_DIR, str(year))

    for timepoint in timepoints:
        timepoint_folder = os.path.join(year_folder, str(timepoint))

        for roi in regions_names:  
            roi_folder = os.path.join(timepoint_folder, roi)

            start_date = config['apsim_params']['time_bounds'][year][timepoint]['sim_start_date']
            end_date = config['apsim_params']['time_bounds'][year][timepoint]['sim_end_date']
            
            apsim_template_path = regions_apsimfile[roi]

            with open(apsim_template_path, "r", encoding="utf-8") as file:
                data = file.read()

                # Replace "Start" and "End" dates with new values
                data = re.sub(
                    r'"Start":\s*"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}"', 
                    f'"Start": "{start_date}T00:00:00"', 
                    data
                )
                data = re.sub(
                    r'"End":\s*"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}"', 
                    f'"End": "{end_date}T00:00:00"', 
                    data
                )

                # Write new file
                print(f'Writing new file for {roi} in {timepoint} of {year} to {roi_folder}')
               
                new_apsim_path = os.path.join(roi_folder, f'{roi}_template.apsimx')
                with open(new_apsim_path, "w", encoding="utf-8") as new_file:
                    new_file.write(data)

Writing new file for alchevska in T-0 of 2024 to /home/rohan/nasa-harvest/vercye/data/Ukraine/vercye_setups/Ukraine_Regions1_09-05-2025/2024/T-0/alchevska
Writing new file for alushtynska in T-0 of 2024 to /home/rohan/nasa-harvest/vercye/data/Ukraine/vercye_setups/Ukraine_Regions1_09-05-2025/2024/T-0/alushtynska
Writing new file for amvrosi_vskyi in T-0 of 2024 to /home/rohan/nasa-harvest/vercye/data/Ukraine/vercye_setups/Ukraine_Regions1_09-05-2025/2024/T-0/amvrosi_vskyi
Writing new file for anan_vskyi in T-0 of 2024 to /home/rohan/nasa-harvest/vercye/data/Ukraine/vercye_setups/Ukraine_Regions1_09-05-2025/2024/T-0/anan_vskyi
Writing new file for antratsitivska in T-0 of 2024 to /home/rohan/nasa-harvest/vercye/data/Ukraine/vercye_setups/Ukraine_Regions1_09-05-2025/2024/T-0/antratsitivska
Writing new file for antratsytivskyi in T-0 of 2024 to /home/rohan/nasa-harvest/vercye/data/Ukraine/vercye_setups/Ukraine_Regions1_09-05-2025/2024/T-0/antratsytivskyi
Writing new file for apostolivskyi

In [14]:
# Now Place validation data if available in each timepoint (see docs for more info).