In [9]:
# Imports

import os
import shutil
import re
import yaml

import geopandas as gpd
from vercye_ops.apsim.convert_shapefile_to_geojson import convert_shapefile_to_geojson

In [2]:
# Define parameters and paths
# Please refer to the documentation to understand on how to define the config.

SHAPEFILE_PATH = "/home/rohan/nasa-harvest/vercye/data/morocco/Regions/mar_admbnda_hcp_clipped/shapefile/mar_admbnda_adm3_hcp_20230925_geo2_maClipped.shp"
ADMIN_COLUMN_NAME = "new_index" # Name of the column that contains the administrative division level names for the level of interest in you shapefile.

# Set these, if you want to filter the regions in your shapefile. Otherwise set to None
# E.g you could set filter_col_name = 'ADMIN_2' and filter_col_name = ['Arizona'] to only print districts in Arizona.
# Ensure the values in filter_col_values are the same as in the shapefile.

# In this code regions with more than 5% Cropland according to Worldcereal used for filtering
FILTER_COL_NAME = "ADM2_FR"
FILTER_COL_VALUES = [
 'Settat', 'Safi', 'Fès', 'Meknès', 'Benslimane',
 'Khouribga', 'El Hajeb', 'Mediouna', 'Essaouira',
 'Marrakech', 'Moulay Yacoub', 'Taounate', 'Al Haouz',
 'Ifrane', 'Nouaceur', 'Sefrou', 'Kénitra', 'Tiznit',
 'Casablanca', 'Azilal', 'Taroudannt', 'Oujda-Angad',
 'Larache', 'Taza', 'Salé', 'Berkane',
 'El Kelâa des Sraghna', 'Sidi Kacem', 'Béni Mellal',
 'El Jadida', 'Khémisset', 'Skhirate- Témara', 'Khénifra',
 'Chichaoua', 'Chtouka- Ait Baha', 'Agadir Ida Ou Tanane', 
 'Inezgane Ait Melloul', 'Mohammadia'
]

# Intermediate output folder where to save the extracted geojsons
GEOJSONS_FOLDER = "/home/rohan/nasa-harvest/vercye/data/morocco/Regions/mar_admbnda_hcp_clipped/Vercye" 

# In the beginning you want to start out with a snakemake config file that is completely filled out except for the "regions" field
# This script will help you fill out the "regions" field with the regions extracted from the shapefile
OUTPUT_DIR = "/home/rohan/nasa-harvest/vercye/data/morocco/vercye_setups/morocco" # Directory to save the new head_dir structure and files
SNAKEFILE_CONFIG = "/home/rohan/nasa-harvest/vercye/data/morocco/vercye_setups/morocco/config_template.yaml" # Config file for the snakemake pipeline


# If you want to use multiple APSIM files, you can specify which regions should be used for which APSIM file here
# This can be done by matching a column in the shapefile to the APSIM file name
# APSIM_TEMPLATE_PATHS_FILTER_COL_NAME specifies the column in the shapefile that should be used to match the APSIM file name
# All regions that have this column value will be used for the corresponding APSIM file
# Enure that APSIM_TEMPLATE_PATHS contains the corresponding column values with the APSIM file path
# If using only one APSIM file, set the col Name to None and name the APSIM file's key 'all'
APSIM_TEMPLATE_PATHS_FILTER_COL_NAME = None
APSIM_TEMPLATE_PATHS = {
    'all': '/home/rohan/nasa-harvest/vercye/data/morocco/APSIM_Templates/Morocco_Wheat_18_3_25.apsimx'
}

# Overwrite the input config with metadata from here

In [3]:
# Extracts geojsons in their corresponding vercye-style directories from a shapefile

config = None
with open(SNAKEFILE_CONFIG) as snakemake_config_reader:
    try:
        config = yaml.safe_load(snakemake_config_reader)
    except yaml.YAMLError as e:
        print(e)

SHAPEFILE_CENTROID_PROJECTION_CRS = config['matching_params']['target_crs'].strip('\'"')
convert_shapefile_to_geojson(shp_fpath=SHAPEFILE_PATH, admin_name_col=ADMIN_COLUMN_NAME, projection_crs=SHAPEFILE_CENTROID_PROJECTION_CRS, output_head_dir=GEOJSONS_FOLDER)



In [4]:
# Prints the names of the regions that have at least one pixel after rasterization
# Copy the names of the regions that you want to keep from the output into your snakemake config under regions

keep_regions = []
regions_apsimfile  = {}
for f in sorted(os.listdir(GEOJSONS_FOLDER)):
    region_name = f
    geojson_folder_path = os.path.join(GEOJSONS_FOLDER, f)
    if not os.path.isdir(geojson_folder_path):
        continue
    
    gdf = gpd.read_file(os.path.join(geojson_folder_path, f + ".geojson"))

    # Check if the shapefile has  only a single polygon
    if len(gdf) > 1:
        raise ValueError(f"Shapefile {SHAPEFILE_PATH} has more than one polygon. Please make sure the shapefile has only a single polygon.")

    # If user want only a polygons from a specific region, filter
    if FILTER_COL_NAME is not None and not gdf[FILTER_COL_NAME][0] in FILTER_COL_VALUES:
        continue

    if APSIM_TEMPLATE_PATHS_FILTER_COL_NAME is not None:
        if APSIM_TEMPLATE_PATHS_FILTER_COL_NAME not in gdf.columns:
           raise ValueError(f"Column {APSIM_TEMPLATE_PATHS_FILTER_COL_NAME} not found in the shapefile.")
        
        apsim_region_key = gdf[APSIM_TEMPLATE_PATHS_FILTER_COL_NAME][0]
        regions_apsimfile[region_name] = APSIM_TEMPLATE_PATHS[apsim_region_key]
    else:
        apsim_region_key = 'all'
        regions_apsimfile[region_name] = APSIM_TEMPLATE_PATHS['all']

    keep_regions.append(f)

In [5]:
# Creates Folder Structure from Config for years/timepoints, copying all regions
# Ensure you have filled in your config completely before running this script

config = None
with open(SNAKEFILE_CONFIG) as snakemake_config_reader:
    try:
        config = yaml.safe_load(snakemake_config_reader)
    except yaml.YAMLError as e:
        print(e)

years = config['years']
timepoints = config['timepoints']
regions_names = keep_regions

for region_name in regions_names:
    region_file_path = os.path.join(GEOJSONS_FOLDER, region_name, f'{region_name}.geojson')
    
    for year in years:
        year_folder = os.path.join(OUTPUT_DIR, str(year))
        
        for timepoint in timepoints:
            timepoint_folder = os.path.join(year_folder, str(timepoint))
            
            roi_folder = os.path.join(timepoint_folder, region_name)
            os.makedirs(roi_folder, exist_ok=True)

            shutil.copy(region_file_path, roi_folder)

In [6]:
# Copies and adjusts APSIM file to each folder. Adjustment applied for start and end dates as defined in snakemake config for the timepoint

config = None
with open(SNAKEFILE_CONFIG) as snakemake_config_reader:
    try:
        config = yaml.safe_load(snakemake_config_reader)
    except yaml.YAMLError as e:
        print(e)

years = config['years']
timepoints = config['timepoints']
regions_names = keep_regions

for year in years:
    year_folder = os.path.join(OUTPUT_DIR, str(year))

    for timepoint in timepoints:
        timepoint_folder = os.path.join(year_folder, str(timepoint))

        for roi in regions_names:  
            roi_folder = os.path.join(timepoint_folder, roi)

            start_date = config['apsim_params']['time_bounds'][year][timepoint]['sim_start_date']
            end_date = config['apsim_params']['time_bounds'][year][timepoint]['sim_end_date']
            
            apsim_template_path = regions_apsimfile[roi]

            with open(apsim_template_path, "r", encoding="utf-8") as file:
                data = file.read()

                # Replace "Start" and "End" dates with new values
                data = re.sub(
                    r'"Start":\s*"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}"', 
                    f'"Start": "{start_date}T00:00:00"', 
                    data
                )
                data = re.sub(
                    r'"End":\s*"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}"', 
                    f'"End": "{end_date}T00:00:00"', 
                    data
                )

                # Write new file
                print(f'Writing new file for {roi} in {timepoint} of {year} to {roi_folder}')
               
                new_apsim_path = os.path.join(roi_folder, f'{roi}_template.apsimx')
                with open(new_apsim_path, "w", encoding="utf-8") as new_file:
                    new_file.write(data)

Writing new file for 100 in T-0 of 2019 to /home/rohan/nasa-harvest/vercye/data/morocco/vercye_setups/morocco/2019/T-0/100
Writing new file for 1000 in T-0 of 2019 to /home/rohan/nasa-harvest/vercye/data/morocco/vercye_setups/morocco/2019/T-0/1000
Writing new file for 1001 in T-0 of 2019 to /home/rohan/nasa-harvest/vercye/data/morocco/vercye_setups/morocco/2019/T-0/1001
Writing new file for 1002 in T-0 of 2019 to /home/rohan/nasa-harvest/vercye/data/morocco/vercye_setups/morocco/2019/T-0/1002
Writing new file for 1003 in T-0 of 2019 to /home/rohan/nasa-harvest/vercye/data/morocco/vercye_setups/morocco/2019/T-0/1003
Writing new file for 1004 in T-0 of 2019 to /home/rohan/nasa-harvest/vercye/data/morocco/vercye_setups/morocco/2019/T-0/1004
Writing new file for 1005 in T-0 of 2019 to /home/rohan/nasa-harvest/vercye/data/morocco/vercye_setups/morocco/2019/T-0/1005
Writing new file for 1006 in T-0 of 2019 to /home/rohan/nasa-harvest/vercye/data/morocco/vercye_setups/morocco/2019/T-0/1006
Wr

In [7]:
# Update the config with the regions that were kept under the regions field
NEW_SNAKEFILECONFIG_PATH = SNAKEFILE_CONFIG.replace("_template", "")

if config is None:
    print("Config file not found. Please check the path to the config file.")

config['regions'] = keep_regions
with open(NEW_SNAKEFILECONFIG_PATH, 'w') as snakemake_config_writer:
    yaml.dump(config, snakemake_config_writer, default_flow_style=False)
    print(f"Updated the config file with the regions: {keep_regions}")
    print(f"Please check the config file at {SNAKEFILE_CONFIG} to make sure everything is correct.")

config['regions_shp_col'] = ADMIN_COLUMN_NAME
config['regions_shp_filter_col'] = FILTER_COL_NAME
config['regions_shp_filter_values'] = FILTER_COL_VALUES
config['APSIM_TEMPLATE_PATHS'] = APSIM_TEMPLATE_PATHS
config['regions_shp_name'] = SHAPEFILE_PATH

# Update the config
with open(NEW_SNAKEFILECONFIG_PATH, 'w') as snakemake_config_writer:
    yaml.dump(config, snakemake_config_writer, default_flow_style=False)

Updated the config file with the regions: ['100', '1000', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '101', '1010', '1011', '1012', '1013', '1014', '1015', '1016', '1017', '1019', '102', '1020', '1021', '1022', '1023', '1024', '1025', '1026', '1027', '1028', '1029', '103', '1030', '1031', '1032', '1033', '1034', '1035', '1036', '1037', '1038', '1039', '104', '1040', '1041', '1042', '1043', '1044', '1045', '1046', '1047', '1048', '1049', '105', '1050', '1055', '1056', '1057', '1058', '1059', '106', '1060', '1061', '1062', '1063', '1064', '1065', '1066', '1067', '1068', '1069', '107', '1070', '1071', '1072', '1073', '1074', '1075', '1076', '108', '109', '110', '1105', '1106', '1107', '111', '112', '1127', '1128', '1129', '113', '1130', '1131', '1132', '114', '115', '116', '117', '118', '1183', '1184', '1185', '1186', '1187', '1188', '1189', '119', '1190', '1191', '1192', '1193', '1194', '1195', '120', '1202', '1203', '1204', '121', '122', '1228', '1229', '123

In [8]:
# Now Place validation data if available in each timepoint (see docs for more info).