From 86ca22833c990beffa807bdaac88eba551c0bb5c Mon Sep 17 00:00:00 2001 From: claudio candelori Date: Mon, 30 Mar 2026 10:45:41 +0200 Subject: [PATCH 1/7] cgi initial commit --- .gitignore | 1 + earthcode/__init__.py | 2 + earthcode/experiment_generator.py | 55 +++++++++++++ earthcode/product_generator.py | 60 ++++++++++++++ earthcode/project_generator.py | 53 +++++++++++++ earthcode/stac_generator.py | 76 ++++++++++++++++++ earthcode/template_generator.py | 78 +++++++++++++++++++ earthcode/templates/experiment.yaml | 51 ++++++++++++ .../templates/experiment_environment.yaml | 2 + earthcode/templates/experiment_input.yaml | 3 + earthcode/templates/product.yaml | 54 +++++++++++++ earthcode/templates/project.yaml | 41 ++++++++++ earthcode/templates/workflow.yaml | 41 ++++++++++ earthcode/workflow_generator.py | 54 +++++++++++++ pyproject.toml | 6 ++ tests/test_stac_generator.py | 40 ++++++++++ tests/test_template_generator.py | 54 +++++++++++++ tests/test_utils.py | 6 ++ 18 files changed, 677 insertions(+) create mode 100644 earthcode/experiment_generator.py create mode 100644 earthcode/product_generator.py create mode 100644 earthcode/project_generator.py create mode 100644 earthcode/stac_generator.py create mode 100644 earthcode/template_generator.py create mode 100644 earthcode/templates/experiment.yaml create mode 100644 earthcode/templates/experiment_environment.yaml create mode 100644 earthcode/templates/experiment_input.yaml create mode 100644 earthcode/templates/product.yaml create mode 100644 earthcode/templates/project.yaml create mode 100644 earthcode/templates/workflow.yaml create mode 100644 earthcode/workflow_generator.py create mode 100644 tests/test_stac_generator.py create mode 100644 tests/test_template_generator.py create mode 100644 tests/test_utils.py diff --git a/.gitignore b/.gitignore index 0f764f2..c0e2324 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # pixi environments .pixi +.idea *.egg-info *__pycache__* *.json diff --git a/earthcode/__init__.py b/earthcode/__init__.py index e69de29..844e727 100644 --- a/earthcode/__init__.py +++ b/earthcode/__init__.py @@ -0,0 +1,2 @@ +from .stac_generator import generate_stac +from .template_generator import generate_template diff --git a/earthcode/experiment_generator.py b/earthcode/experiment_generator.py new file mode 100644 index 0000000..c5a780c --- /dev/null +++ b/earthcode/experiment_generator.py @@ -0,0 +1,55 @@ +import os +import json +from datetime import datetime +import logging +import sys + +import yaml + +from earthcode.static import create_experiment_record, ExperimentMetadata + +logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) +log = logging.getLogger() + + +def create_experiment_stac_from_template(experiment_yaml, target): + with open(experiment_yaml, 'r') as file: + data = yaml.safe_load(file) + + for k, v in data.items(): + if v is None: + log.error(f"The Project YAML contains an empty value for the following field: {k}") + raise Exception(f"The Project YAML contains an empty value for the following field: {k}") + + temporal_extent = data.get('temporal_extent', None) + if temporal_extent is not None: + temporal_start = datetime.strptime(temporal_extent['start'], "%Y-%m-%dT%H:%M:%SZ") + temporal_end = datetime.strptime(temporal_extent['end'], "%Y-%m-%dT%H:%M:%SZ") + else: + temporal_start = temporal_end = None + + experiment_metadata = ExperimentMetadata( + experiment_id=data['id'], + experiment_title=data['title'], + experiment_description=data['description'], + experiment_license=data['license'], + experiment_keywords=data['keywords'], + experiment_formats=data['formats'], + experiment_themes=data['themes'], + experiment_input_parameters_link=data['link_params'], + experiment_enviroment_link=data['link_env'], + workflow_id=data['workflow'], + workflow_title=data['workflow-title'], + product_id=data['product'], + product_title=data['product-title'], + contacts=data.get('contacts', None), + experiment_bbox=data.get('spatial_extent', None), + experiment_start_datetime=temporal_start, + experiment_end_datetime=temporal_end, + ) + + experiment_record = create_experiment_record(experiment_metadata) + + # save this file and copy it to the catalog/experiments/{experiment-id}/record.json + with open(os.path.join(target, 'experiment_record.json'), 'w') as f: + json.dump(experiment_record, f, indent=2) diff --git a/earthcode/product_generator.py b/earthcode/product_generator.py new file mode 100644 index 0000000..615b452 --- /dev/null +++ b/earthcode/product_generator.py @@ -0,0 +1,60 @@ +import os +from datetime import datetime +import sys +import logging + +import pystac +import yaml + +from earthcode.static import create_product_collection, ProductCollectionMetadata + +logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) +log = logging.getLogger() + + +def create_product_stac_from_template(project_yaml, target): + with (open(project_yaml, 'r') as file): + data = yaml.safe_load(file) + + for k, v in data.items(): + if v is None: + log.error(f"The Project YAML contains an empty value for the following field: {k}") + raise Exception(f"The Project YAML contains an empty value for the following field: {k}") + + # Define spatial and temporal extent + spatial_extent = pystac.SpatialExtent(data['extent']['spatial']['bbox']).bboxes + temporal_start = datetime.strptime(data['extent']['temporal']['start'], "%Y-%m-%dT%H:%M:%SZ") + temporal_end = datetime.strptime(data['extent']['temporal']['end'], "%Y-%m-%dT%H:%M:%SZ") + + # optional + product_license = data['license'] + if product_license == 'other': + product_license_link = data['license_link'] + else: + product_license_link = None + + product_metadata = ProductCollectionMetadata( + product_id=data['id'], + product_title=data['title'], + product_description=data['description'], + product_keywords=data['keywords'], + product_status=data['status'], + product_region=data['region'], + product_themes=data['themes'], + product_missions=data['missions'], + product_variables=data['variables'], + project_id=data['project'], + project_title=data['project-title'], + product_parameters=data['cf_parameters'], + product_doi=data['sci:doi'], + product_bbox=spatial_extent, + product_start_datetime=temporal_start, + product_end_datetime=temporal_end, + product_license=product_license, + license_link=product_license_link, + ) + + product_collection = create_product_collection(product_metadata) + + # save this file and copy it to the catalog/products/{product_id}/collection.json + product_collection.save_object(dest_href=os.path.join(target, 'product_collection.json')) diff --git a/earthcode/project_generator.py b/earthcode/project_generator.py new file mode 100644 index 0000000..0756eef --- /dev/null +++ b/earthcode/project_generator.py @@ -0,0 +1,53 @@ +import os +from datetime import datetime +import logging +import sys + +import pystac +import yaml + +from earthcode.static import create_project_collection, ProjectCollectionMetadata + +logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) +log = logging.getLogger() + + +def create_project_stac_from_template(project_yaml, target): + with open(project_yaml, 'r') as file: + data = yaml.safe_load(file) + + for k, v in data.items(): + if v is None: + log.error(f"The Project YAML contains an empty value for the following field: {k}") + raise Exception(f"The Project YAML contains an empty value for the following field: {k}") + + # read spatial and temporal extent + spatial_extent = pystac.SpatialExtent(data['extent']['spatial']['bbox']).bboxes + temporal_start = datetime.strptime(data['extent']['temporal']['start'], "%Y-%m-%dT%H:%M:%SZ") + temporal_end = datetime.strptime(data['extent']['temporal']['end'], "%Y-%m-%dT%H:%M:%SZ") + + # read consortium contacts + project_cms = [] + [project_cms.append((member['name'], member['email'])) for member in data['consortium_members']] + + collection = create_project_collection( + ProjectCollectionMetadata( + project_id=data['id'] , + project_title=data['title'], + project_description=data['description'], + project_status=data['status'], + project_license=data['license'], + project_bbox=spatial_extent, + project_start_datetime=temporal_start, + project_end_datetime=temporal_end, + project_themes=data['themes'], + to_name=data['to_name'], + to_email=data['to_email'], + consortium_members=project_cms, + website_link=data['link_website'], + eo4society_link=data['link_eo4society'] + ) + ) + + # save this file and copy it to the catalog/projects/{project}/collection.json + collection.save_object(dest_href=os.path.join(target, 'project_collection.json')) diff --git a/earthcode/stac_generator.py b/earthcode/stac_generator.py new file mode 100644 index 0000000..f44214c --- /dev/null +++ b/earthcode/stac_generator.py @@ -0,0 +1,76 @@ +import argparse +import os +import shutil +from importlib import resources + +from earthcode import project_generator, product_generator, workflow_generator, experiment_generator +import logging +import sys + +logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) +log = logging.getLogger() + + +def generate_stac(project=None, workflow=None, experiment=None, product=None, target=os.getcwd()): + """ + Generates the requested STAC json files at the desired target directory. + If the folder does not exist it will be created. + If no folder is specified the PWD where the program is run will be selected. + + :param project: Path to the Project YAML template, if empty no Project STAC will be generated + :param workflow: Path to the Workflow YAML template, if empty no Workflow STAC will be generated + :param experiment: Path to the Experiment YAML template, if empty no Experiment STAC will be generated + :param product: Path to the Product YAML template, if empty no Product STAC will be generated + :param target: target directory where the STAC json will be created. + """ + if target is None: + log.warning("No target folder specified, the STAC jsons will be generated in the PWD") + target = os.getcwd() + + # Create target directory if it doesn't exist + if not os.path.isdir(target): + os.makedirs(target, exist_ok=True) + + if project is not None: + log.info("Generating Project STAC json at \"" + target + "\"") + project_generator.create_project_stac_from_template(project, target) + if workflow is not None: + log.info("Generating Workflow STAC json at \"" + target + "\"") + workflow_generator.create_workflow_stac_from_template(workflow, target) + if experiment is not None: + log.info("Generating Experiment STAC json and relative YAML files at \"" + target + "\"") + experiment_generator.create_experiment_stac_from_template(experiment, target) + with resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("experiment_environment.yaml")) as path: + shutil.copy(path, target) + with resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("experiment_input.yaml")) as path: + shutil.copy(path, target) + if product is not None: + log.info("Generating Product STAC json at \"" + target + "\"") + product_generator.create_product_stac_from_template(product, target) + + if project is None and workflow is None and experiment is None and product is None: + log.warning("No template provided." + "Run again with at least a provided template to produce the relative STAC json." + "For additional help invoke with -h.") + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-p", "--project", type=str, + help="Project YAML template location") + parser.add_argument("-w", "--workflow", type=str, + help="Workflow YAML template location") + parser.add_argument("-e", "--experiment", type=str, + help="Experiment YAML template location") + parser.add_argument("-o", "--product", type=str, + help="Product YAML template location") + parser.add_argument("-t", "--target", type=str, + help="The target location where the STAC jsons will be created.") + + args = parser.parse_args() + + generate_stac(args.project, args.workflow, args.experiment, args.product, args.target) + + +if __name__ == "__main__": + main() diff --git a/earthcode/template_generator.py b/earthcode/template_generator.py new file mode 100644 index 0000000..4f7bcf7 --- /dev/null +++ b/earthcode/template_generator.py @@ -0,0 +1,78 @@ +import argparse +import os +import shutil +from importlib import resources + +import logging +import sys + +logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) +log = logging.getLogger() + +def generate_template(project=False, workflow=False, experiment=False, product=False, target=os.getcwd()): + """ + Creates requested yaml templates at the desired target folder. + If the folder does not exist it will be created. + If no folder is specified the PWD where the program is run will be selected. + + :param project: If True: generates the Project yaml template + :param workflow: If True: generates the Workflow yaml template + :param experiment: If True: generates the Experiment yaml template + :param product: If True: generates the Product yaml template + :param target: target directory where the templates will be generated. + """ + # If empty use PWD as target directory + if target is None: + log.warning("No target folder specified, the templates will be generated in the PWD") + target = os.getcwd() + + # Create target directory if it doesn't exist + if not os.path.isdir(target): + os.makedirs(target, exist_ok=True) + + if project: + log.info("Generating Project template at \""+target+"\"") + with resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("project.yaml")) as path: + shutil.copy(path, target) + + if workflow: + log.info("Generating Workflow template at \""+target+"\"") + with resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("workflow.yaml")) as path: + shutil.copy(path, target) + + if experiment: + log.info("Generating Experiment template at \""+target+"\"") + with resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("experiment.yaml")) as path: + shutil.copy(path, target) + + if product: + log.info("Generating Product template at \""+target+"\"") + with resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("product.yaml")) as path: + shutil.copy(path, target) + + if not project and not workflow and not experiment and not product: + log.warning("No options selected." + "Run again with at least one option to produce the templates." + "For additional help invoke with -h") + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-p", "--project", action='store_true', + help="If present generate a project template") + parser.add_argument("-w", "--workflow", action='store_true', + help="If present generate a workflow template") + parser.add_argument("-e", "--experiment", action='store_true', + help="If present generate an experiment template") + parser.add_argument("-o", "--product", action='store_true', + help="If present generate a product template") + parser.add_argument("-t", "--target", type=str, + help="The target location where the templates will be generated.") + + args = parser.parse_args() + + generate_template(args.project, args.workflow, args.experiment, args.product, args.target) + + +if __name__ == "__main__": + main() diff --git a/earthcode/templates/experiment.yaml b/earthcode/templates/experiment.yaml new file mode 100644 index 0000000..09b3765 --- /dev/null +++ b/earthcode/templates/experiment.yaml @@ -0,0 +1,51 @@ +# Define experiment properties +id: cool-project-experiment +title: Experiment related to Cool Project +description: The first experiment with data from the Cool Project # Short and meaningful experiment description. +license: proprietary # should be one of https:#github.com/ESA-EarthCODE/open-science-catalog-validation/blob/main/schemas/license.json +keywords: # experiment keywords (to enhance the findability of the experiment) + - agriculture + - crops +formats: # format of experiment output + - GeoTIFF + - PNG +workflow: cool-project-workflow # id of the workflow used for this experiment +workflow-title: Workflow to analyze Cool Project # title of the workflow used for this experiment +product: cool-project-output # id of the output product produced by this experiment +product-title: CoolProjectOutput # title of the output product produced by this experiment + +# Define links, link relations and link titles: e.g. link to service used to run the experiment etc. +link_env: https://myplatform.com +link_params: https://github.com/MyExperiment/parameters.git + +# Define experiment themes. The fields are restricted to the themes available in the OCS and having at least one theme is mandatory. Check available themes here: https:#opensciencedata.esa.int/themes/catalog +themes: + - land + - atmosphere + +# Optional contacts +contacts: +- name: Technical Officer + position: researcher + roles: + - technical_officer + organization: CGI + links: + - https://www.cgi.com + - https://git.com/users/technical-officer + contact_instructions: Contact preferably through project support page + +- name: Junior Staff + roles: + - assistant + +# Optional extent +spatial_extent: + - - -180 + - -90 + - 180 + - 90 + +temporal_extent: + start: '2021-01-01T00:00:00Z' + end: '2021-12-31T23:59:59Z' \ No newline at end of file diff --git a/earthcode/templates/experiment_environment.yaml b/earthcode/templates/experiment_environment.yaml new file mode 100644 index 0000000..eae2688 --- /dev/null +++ b/earthcode/templates/experiment_environment.yaml @@ -0,0 +1,2 @@ +$comment: >- + This file should contain information about the execution environment in the future... \ No newline at end of file diff --git a/earthcode/templates/experiment_input.yaml b/earthcode/templates/experiment_input.yaml new file mode 100644 index 0000000..1d5a68f --- /dev/null +++ b/earthcode/templates/experiment_input.yaml @@ -0,0 +1,3 @@ +$comment: >- + This file should contain information about the input parameters of the experiment in the future... +parameters: [] \ No newline at end of file diff --git a/earthcode/templates/product.yaml b/earthcode/templates/product.yaml new file mode 100644 index 0000000..3243e15 --- /dev/null +++ b/earthcode/templates/product.yaml @@ -0,0 +1,54 @@ +id: cool-project-product # This is the unique id of the product. Typically, contains the dataset title+project name (or acronym) +description: output product results for Cool Project # Short, but meaningful product description. It should provide enough information to the external users on the specific product. +title: Cool Project Product # Product Title +license: proprietary # license of the product. Should be one of https://github.com/ESA-EarthCODE/open-science-catalog-validation/blob/main/schemas/license.json +#license_link: https://github.com/cool-license # If the license is not available put license='other' and fill this field with the license link. +status: completed # Product status +region: cool-land # Text description of the study area +project: cool-project # Unique id of the OSC project this product is related to. It must be the id provided in the ./project/ +project-title: Cool Project # title of the related project + +# Define project Themes. The fields are restricted to the themes available in the OCS and having at least one theme is mandatory. +# Check available themes here: https://opensciencedata.esa.int/themes/catalog and https://github.com/ESA-EarthCODE/open-science-catalog-metadata/main/themes +themes: + - land + - oceans + +# List of Keywords associated with the product. These are expected to be inline with the description. +keywords: + - agriculture + - crops + +# Array of Variables related to the product. This array of values is mandatory and limited to variables already existing in the OSC +# in this list https://github.com/ESA-EarthCODE/open-science-catalog-metadata/tree/main/variables. +# If you would like to associate your product to a variable that is not on the list, create variable entry first. +variables: + - cool-stuff + - cool-things + +# Array of CF Parameters: see https://github.com/stac-extensions/cf for more details +cf_parameters: + - cool-stuff + - cool-tools + +# Array of ESA missions related to the product. This array of values is mandatory and limited to missions already existing in the OSC +# in this list: https://github.com/ESA-EarthCODE/open-science-catalog-metadata/tree/main/eo-missions. +# If you would like to associate your product to a mission that is not on the list, create an eo-mission entry first. +missions: + - cool-mission + - freezing-mission + +# The Temporal and Spatial Extent of the product +extent: + spatial: + bbox: + - - -180 + - -90 + - 180 + - 90 + temporal: + start: '2021-01-01T00:00:00Z' + end: '2021-12-31T23:59:59Z' + +# DOI reference +sci:doi: https://doi.org/10.12345/abc-r4nd0mn \ No newline at end of file diff --git a/earthcode/templates/project.yaml b/earthcode/templates/project.yaml new file mode 100644 index 0000000..811dd37 --- /dev/null +++ b/earthcode/templates/project.yaml @@ -0,0 +1,41 @@ +--- +# Define id, title, description, project status, license +id: cool-project-id # This is your project id. Please make sure to use unique id name for your project! The parent folder of the collection.json should have the same name as this id (not displayed in the browser). +title: CoolProjectTitle # Title of your project. Official acronym of the project may be used as well (this will be displayed to public) +description: Brief description of the project +status: completed # status of the project - Select from: completed, ongoing, scheduled +license: proprietary # Top level license of project outcomes. Should be one of https://github.com/ESA-EarthCODE/open-science-catalog-validation/blob/main/schemas/license.json + +# Define spatial and temporal extent: the study area of the project and its planned duration. +extent: + spatial: + bbox: + - - -180 + - -90 + - 180 + - 90 + temporal: + start: '2021-01-01T00:00:00Z' + end: '2021-12-31T23:59:59Z' + +# Define links, link relations and link titles. This is a list of links to the project websites. These are mandatory and you have to adapt them to your project. +link_website: https://cool-project.com +link_eo4society: https://eo4society.esa.int/projects + +# Define project themes. The fields are restricted to the themes available in the OCS and having at least one theme is mandatory. Check available themes here: https://opensciencedata.esa.int/themes/catalog +themes: +- land +- atmosphere +- cryosphere +- magnetosphere-ionosphere +- oceans +- solid-earth + +# Define contacts, list of consortium members working on the project and contact to ESA TO following the project. This field is required. +to_name: Technical Officer +to_email: t_o_mail@mail.com +consortium_members: + - name: Consortium Member 1 + email: cm1@mail.com + - name: Consortium Member 2 + email: cm2@mail.com \ No newline at end of file diff --git a/earthcode/templates/workflow.yaml b/earthcode/templates/workflow.yaml new file mode 100644 index 0000000..c0b8dac --- /dev/null +++ b/earthcode/templates/workflow.yaml @@ -0,0 +1,41 @@ +--- +# Unique identifier for the workflow STAC item +id: cool-project-workflow + +# Core workflow metadata +title: Workflow to analyze Cool Project # A concise, descriptive title of the workflow +description: The first workflow able to process images from Cool Project # A summary of what the workflow does +project: cool-project-id # Project ID associated with the workflow +project-title: CoolProjectTitle # Title of associated project +formats: # Output formats of the workflow (e.g., GeoTIFF). +- GeoTIFF +- PNG +keywords: # Array of keywords to support discoverability (e.g., agriculture, crops). +- keyword1 +- keyword2 +- keyword3... +license: proprietary # License for the workflow +sci:doi: https://doi.org/10.12345/abc-r4nd0mn # DOI reference + +# Array of themes the workflow relates to. Each entry includes a concepts array with IDs (e.g., 'land') and a scheme URL. +themes: + - land + - atmosphere + - cryosphere + - magnetosphere-ionosphere + - oceans + - solid-earth + +# Define links, link relations and link titles: links to public repository where the workflow codebase can be found, to service used to run the workflow etc. +link_code: https://raw.githubusercontent.com/MyProject/My-Project/refs/tags/.. + +# Optional +spatial_extent: # BBOX + - - -180 + - -90 + - 180 + - 90 + +temporal_extent: # TIME + start: '2021-01-01T00:00:00Z' + end: '2021-12-31T23:59:59Z' \ No newline at end of file diff --git a/earthcode/workflow_generator.py b/earthcode/workflow_generator.py new file mode 100644 index 0000000..06f6cb9 --- /dev/null +++ b/earthcode/workflow_generator.py @@ -0,0 +1,54 @@ +import os +import json +from datetime import datetime +import logging +import sys + +import yaml + +from earthcode.static import create_workflow_record, WorkflowMetadata + +logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) +log = logging.getLogger() + + +def create_workflow_stac_from_template(project_yaml, target): + with open(project_yaml, 'r') as file: + data = yaml.safe_load(file) + + for k, v in data.items(): + if v is None: + log.error(f"The Project YAML contains an empty value for the following field: {k}") + raise Exception(f"The Project YAML contains an empty value for the following field: {k}") + + # read optional spatial and temporal extent + spatial_extent = data.get('spatial_extent', None) + temporal_extent = data.get('temporal_extent', None) + if temporal_extent is not None: + temporal_start = datetime.strptime(temporal_extent['start'], "%Y-%m-%dT%H:%M:%SZ") + temporal_end = datetime.strptime(temporal_extent['end'], "%Y-%m-%dT%H:%M:%SZ") + else: + temporal_start = temporal_end = None + + workflow_metadata = WorkflowMetadata( + workflow_id=data['id'], + workflow_title=data['title'], + workflow_description=data['description'], + workflow_license=data['license'], + workflow_keywords=data['keywords'], + workflow_formats=data['formats'], + workflow_themes=data['themes'], + codeurl=data['link_code'], + project_id=data['project'], + project_title=data['project-title'], + workflow_doi=data['sci:doi'], + workflow_bbox=spatial_extent, + workflow_start_datetime=temporal_start, + workflow_end_datetime=temporal_end + ) + + workflow_record = create_workflow_record(workflow_metadata) + + # save this file and copy it to the catalog/workflows/{workflow-id}/record.json + with open(os.path.join(target, 'workflow_record.json'), 'w') as f: + json.dump(workflow_record, f, indent=2) diff --git a/pyproject.toml b/pyproject.toml index 56a9ba4..07bb600 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,7 @@ license-files = ["LICENSE"] authors = [ { name = "Krasen Samardzhiev", email = "krasensam@gmail.com" }, { name = "Deyan Samardzhiev", email = "dean@lampata.co.uk" }, + { name = "Claudio Candelori", email = "claudio.candelori@cgi.com"}, ] keywords = [ "earth observation", @@ -47,6 +48,7 @@ dependencies = [ "pydantic>=2.12.5,<3", "requests>=2.32.5,<3", "tokenizers>=0.22.2,<1", + "PyYAML" ] [project.urls] @@ -73,3 +75,7 @@ exclude = ["earthcode/models/**"] [tool.hatch.build.targets.wheel] packages = ["earthcode"] + +[project.scripts] +earthcode_template_gen = "earthcode.template_generator:main" +earthcode_stac_gen = "earthcode.stac_generator:main" \ No newline at end of file diff --git a/tests/test_stac_generator.py b/tests/test_stac_generator.py new file mode 100644 index 0000000..8ae8a33 --- /dev/null +++ b/tests/test_stac_generator.py @@ -0,0 +1,40 @@ +import unittest +import tempfile +import shutil +import os +from importlib import resources + +from earthcode.stac_generator import generate_stac +from test_utils import assertIsFile + + +class TestStacGenerator(unittest.TestCase): + def setUp(self): + # Create a temporary directory + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + # Remove the directory after the test + shutil.rmtree(self.test_dir) + + def test_generate_stac(self): + with (resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("project.yaml")) as project_template, + resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("workflow.yaml")) as workflow_template, + resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("experiment.yaml")) as experiment_template, + resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("product.yaml")) as product_template): + generate_stac(project=project_template, workflow=workflow_template, experiment=experiment_template, product=product_template, target=self.test_dir) + + project = os.path.join(self.test_dir, "project_collection.json") + workflow = os.path.join(self.test_dir, "workflow_record.json") + experiment = os.path.join(self.test_dir, "experiment_record.json") + product = os.path.join(self.test_dir, "product_collection.json") + + assertIsFile(project) + assertIsFile(workflow) + assertIsFile(experiment) + assertIsFile(product) + + def test_generate_template_with_no_template_selected(self): + with self.assertLogs(level='WARNING') as log: + generate_stac(project=None, workflow=None, experiment=None, product=None, target=self.test_dir) + self.assertIn("No template provided.", log.output[0]) diff --git a/tests/test_template_generator.py b/tests/test_template_generator.py new file mode 100644 index 0000000..7ab568c --- /dev/null +++ b/tests/test_template_generator.py @@ -0,0 +1,54 @@ +import unittest +import tempfile +import shutil +import os +import filecmp +from importlib import resources + +from earthcode.template_generator import generate_template +from test_utils import assertIsFile + + +def assertIsFile(path): + if not pathlib.Path(path).resolve().is_file(): + raise AssertionError("File does not exist: %s" % str(path)) + + +class TestTemplateGenerator(unittest.TestCase): + def setUp(self): + # Create a temporary directory + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + # Remove the directory after the test + shutil.rmtree(self.test_dir) + + def test_generate_template(self): + generate_template(project=True, workflow=True, experiment=True, product=True, target=self.test_dir) + + project = os.path.join(self.test_dir, "project.yaml") + workflow = os.path.join(self.test_dir, "workflow.yaml") + experiment = os.path.join(self.test_dir, "experiment.yaml") + product = os.path.join(self.test_dir, "product.yaml") + + assertIsFile(project) + assertIsFile(workflow) + assertIsFile(experiment) + assertIsFile(product) + + with resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("project.yaml")) as expected_project: + self.assertTrue(filecmp.cmp(project, expected_project), "The project template is different from the expected one") + + with resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("workflow.yaml")) as expected_workflow: + self.assertTrue(filecmp.cmp(workflow, expected_workflow), "The workflow template is different from the expected one") + + with resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("experiment.yaml")) as expected_experiment: + self.assertTrue(filecmp.cmp(experiment, expected_experiment), "The experiment template is different from the expected one") + + with resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("product.yaml")) as expected_product: + self.assertTrue(filecmp.cmp(product, expected_product), "The product template is different from the expected one") + + def test_generate_template_with_no_template_selected(self): + with self.assertLogs(level='WARNING') as log: + generate_template(project=False, workflow=False, experiment=False, product=False, target=self.test_dir) + self.assertIn("No options selected.", log.output[0]) diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..c5f861d --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,6 @@ +import pathlib + + +def assertIsFile(path): + if not pathlib.Path(path).resolve().is_file(): + raise AssertionError("File does not exist: %s" % str(path)) From b9bb921989888b93df91bf56bab37f84c0a10b51 Mon Sep 17 00:00:00 2001 From: claudio candelori Date: Tue, 31 Mar 2026 12:49:00 +0200 Subject: [PATCH 2/7] Specified PyYAML version --- pixi.toml | 1 + pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pixi.toml b/pixi.toml index 269677b..50bd4c7 100644 --- a/pixi.toml +++ b/pixi.toml @@ -29,6 +29,7 @@ pillow = ">=10.3,<12" onnxruntime = ">=1.24.1,<2" platformdirs = ">=4.5.0,<5" tokenizers = ">=0.22.2,<1" +PyYAML = ">=6.0.3" [target.osx-arm64.dependencies] python = ">=3.12,<3.14" diff --git a/pyproject.toml b/pyproject.toml index 07bb600..216a983 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ dependencies = [ "pydantic>=2.12.5,<3", "requests>=2.32.5,<3", "tokenizers>=0.22.2,<1", - "PyYAML" + "PyYAML>=6.0.3", ] [project.urls] From cac9ac231b5ff840c71383fb5fc57514c331c5f8 Mon Sep 17 00:00:00 2001 From: claudio candelori Date: Wed, 1 Apr 2026 16:04:04 +0200 Subject: [PATCH 3/7] implemented review comments 1 --- earthcode/experiment_generator.py | 10 ++--- earthcode/product_generator.py | 9 ++-- earthcode/project_generator.py | 13 +++--- earthcode/stac_generator.py | 43 ++++++------------- .../templates/experiment_environment.yaml | 2 - earthcode/templates/experiment_input.yaml | 3 -- earthcode/workflow_generator.py | 10 ++--- pixi.toml | 2 +- pyproject.toml | 2 +- tests/test_stac_generator.py | 22 +--------- 10 files changed, 36 insertions(+), 80 deletions(-) delete mode 100644 earthcode/templates/experiment_environment.yaml delete mode 100644 earthcode/templates/experiment_input.yaml diff --git a/earthcode/experiment_generator.py b/earthcode/experiment_generator.py index c5a780c..c9b51bc 100644 --- a/earthcode/experiment_generator.py +++ b/earthcode/experiment_generator.py @@ -1,5 +1,4 @@ -import os -import json +from pathlib import Path from datetime import datetime import logging import sys @@ -7,12 +6,13 @@ import yaml from earthcode.static import create_experiment_record, ExperimentMetadata +from earthcode.git_add import save_experiment_record_to_osc logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) log = logging.getLogger() -def create_experiment_stac_from_template(experiment_yaml, target): +def create_experiment_stac_from_template(experiment_yaml, osc_path): with open(experiment_yaml, 'r') as file: data = yaml.safe_load(file) @@ -50,6 +50,4 @@ def create_experiment_stac_from_template(experiment_yaml, target): experiment_record = create_experiment_record(experiment_metadata) - # save this file and copy it to the catalog/experiments/{experiment-id}/record.json - with open(os.path.join(target, 'experiment_record.json'), 'w') as f: - json.dump(experiment_record, f, indent=2) + save_experiment_record_to_osc(experiment_record, Path(osc_path)) diff --git a/earthcode/product_generator.py b/earthcode/product_generator.py index 615b452..b4908ea 100644 --- a/earthcode/product_generator.py +++ b/earthcode/product_generator.py @@ -1,4 +1,4 @@ -import os +from pathlib import Path from datetime import datetime import sys import logging @@ -7,12 +7,14 @@ import yaml from earthcode.static import create_product_collection, ProductCollectionMetadata +from earthcode.git_add import save_product_collection_to_catalog + logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) log = logging.getLogger() -def create_product_stac_from_template(project_yaml, target): +def create_product_stac_from_template(project_yaml, osc_path): with (open(project_yaml, 'r') as file): data = yaml.safe_load(file) @@ -56,5 +58,4 @@ def create_product_stac_from_template(project_yaml, target): product_collection = create_product_collection(product_metadata) - # save this file and copy it to the catalog/products/{product_id}/collection.json - product_collection.save_object(dest_href=os.path.join(target, 'product_collection.json')) + save_product_collection_to_catalog(product_collection, Path(osc_path)) diff --git a/earthcode/project_generator.py b/earthcode/project_generator.py index 0756eef..086b8a0 100644 --- a/earthcode/project_generator.py +++ b/earthcode/project_generator.py @@ -1,4 +1,4 @@ -import os +from pathlib import Path from datetime import datetime import logging import sys @@ -7,12 +7,13 @@ import yaml from earthcode.static import create_project_collection, ProjectCollectionMetadata +from earthcode.git_add import save_project_collection_to_osc logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) log = logging.getLogger() -def create_project_stac_from_template(project_yaml, target): +def create_project_stac_from_template(project_yaml, osc_path): with open(project_yaml, 'r') as file: data = yaml.safe_load(file) @@ -30,8 +31,7 @@ def create_project_stac_from_template(project_yaml, target): project_cms = [] [project_cms.append((member['name'], member['email'])) for member in data['consortium_members']] - collection = create_project_collection( - ProjectCollectionMetadata( + project_metadata = ProjectCollectionMetadata( project_id=data['id'] , project_title=data['title'], project_description=data['description'], @@ -47,7 +47,6 @@ def create_project_stac_from_template(project_yaml, target): website_link=data['link_website'], eo4society_link=data['link_eo4society'] ) - ) + project_collection = create_project_collection(project_metadata) - # save this file and copy it to the catalog/projects/{project}/collection.json - collection.save_object(dest_href=os.path.join(target, 'project_collection.json')) + save_project_collection_to_osc(project_collection, Path(osc_path)) diff --git a/earthcode/stac_generator.py b/earthcode/stac_generator.py index f44214c..939ab05 100644 --- a/earthcode/stac_generator.py +++ b/earthcode/stac_generator.py @@ -1,7 +1,5 @@ import argparse import os -import shutil -from importlib import resources from earthcode import project_generator, product_generator, workflow_generator, experiment_generator import logging @@ -11,42 +9,29 @@ log = logging.getLogger() -def generate_stac(project=None, workflow=None, experiment=None, product=None, target=os.getcwd()): +def generate_stac(osc_path, project=None, workflow=None, experiment=None, product=None): """ - Generates the requested STAC json files at the desired target directory. - If the folder does not exist it will be created. - If no folder is specified the PWD where the program is run will be selected. + Generates the requested STAC json files at the specified OSC repo. + :param osc_path: OSC repo where the STAC json will be created. :param project: Path to the Project YAML template, if empty no Project STAC will be generated :param workflow: Path to the Workflow YAML template, if empty no Workflow STAC will be generated :param experiment: Path to the Experiment YAML template, if empty no Experiment STAC will be generated :param product: Path to the Product YAML template, if empty no Product STAC will be generated - :param target: target directory where the STAC json will be created. """ - if target is None: - log.warning("No target folder specified, the STAC jsons will be generated in the PWD") - target = os.getcwd() - - # Create target directory if it doesn't exist - if not os.path.isdir(target): - os.makedirs(target, exist_ok=True) if project is not None: - log.info("Generating Project STAC json at \"" + target + "\"") - project_generator.create_project_stac_from_template(project, target) + log.info("Generating Project STAC json in OSC @ \"" + osc_path + "\"") + project_generator.create_project_stac_from_template(project, osc_path) if workflow is not None: - log.info("Generating Workflow STAC json at \"" + target + "\"") - workflow_generator.create_workflow_stac_from_template(workflow, target) + log.info("Generating Workflow STAC json in OSC @ \"" + osc_path + "\"") + workflow_generator.create_workflow_stac_from_template(workflow, osc_path) if experiment is not None: - log.info("Generating Experiment STAC json and relative YAML files at \"" + target + "\"") - experiment_generator.create_experiment_stac_from_template(experiment, target) - with resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("experiment_environment.yaml")) as path: - shutil.copy(path, target) - with resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("experiment_input.yaml")) as path: - shutil.copy(path, target) + log.info("Generating Experiment STAC json in OSC @ \"" + osc_path + "\"") + experiment_generator.create_experiment_stac_from_template(experiment, osc_path) if product is not None: - log.info("Generating Product STAC json at \"" + target + "\"") - product_generator.create_product_stac_from_template(product, target) + log.info("Generating Product STAC json in OSC @ \"" + osc_path + "\"") + product_generator.create_product_stac_from_template(product, osc_path) if project is None and workflow is None and experiment is None and product is None: log.warning("No template provided." @@ -64,12 +49,12 @@ def main(): help="Experiment YAML template location") parser.add_argument("-o", "--product", type=str, help="Product YAML template location") - parser.add_argument("-t", "--target", type=str, - help="The target location where the STAC jsons will be created.") + parser.add_argument("-m", "--oscm", type=str, + help="The target OSC location where the STAC jsons will be created.") args = parser.parse_args() - generate_stac(args.project, args.workflow, args.experiment, args.product, args.target) + generate_stac(args.oscm, args.project, args.workflow, args.experiment, args.product) if __name__ == "__main__": diff --git a/earthcode/templates/experiment_environment.yaml b/earthcode/templates/experiment_environment.yaml deleted file mode 100644 index eae2688..0000000 --- a/earthcode/templates/experiment_environment.yaml +++ /dev/null @@ -1,2 +0,0 @@ -$comment: >- - This file should contain information about the execution environment in the future... \ No newline at end of file diff --git a/earthcode/templates/experiment_input.yaml b/earthcode/templates/experiment_input.yaml deleted file mode 100644 index 1d5a68f..0000000 --- a/earthcode/templates/experiment_input.yaml +++ /dev/null @@ -1,3 +0,0 @@ -$comment: >- - This file should contain information about the input parameters of the experiment in the future... -parameters: [] \ No newline at end of file diff --git a/earthcode/workflow_generator.py b/earthcode/workflow_generator.py index 06f6cb9..9c70fab 100644 --- a/earthcode/workflow_generator.py +++ b/earthcode/workflow_generator.py @@ -1,18 +1,18 @@ -import os -import json +from pathlib import Path from datetime import datetime import logging import sys import yaml +from earthcode.git_add import save_workflow_record_to_osc from earthcode.static import create_workflow_record, WorkflowMetadata logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) log = logging.getLogger() -def create_workflow_stac_from_template(project_yaml, target): +def create_workflow_stac_from_template(project_yaml, osc_path): with open(project_yaml, 'r') as file: data = yaml.safe_load(file) @@ -49,6 +49,4 @@ def create_workflow_stac_from_template(project_yaml, target): workflow_record = create_workflow_record(workflow_metadata) - # save this file and copy it to the catalog/workflows/{workflow-id}/record.json - with open(os.path.join(target, 'workflow_record.json'), 'w') as f: - json.dump(workflow_record, f, indent=2) + save_workflow_record_to_osc(workflow_record, Path(osc_path)) diff --git a/pixi.toml b/pixi.toml index 50bd4c7..d09156f 100644 --- a/pixi.toml +++ b/pixi.toml @@ -29,7 +29,7 @@ pillow = ">=10.3,<12" onnxruntime = ">=1.24.1,<2" platformdirs = ">=4.5.0,<5" tokenizers = ">=0.22.2,<1" -PyYAML = ">=6.0.3" +PyYAML = ">=6.0.3,<7" [target.osx-arm64.dependencies] python = ">=3.12,<3.14" diff --git a/pyproject.toml b/pyproject.toml index 216a983..9674299 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ dependencies = [ "pydantic>=2.12.5,<3", "requests>=2.32.5,<3", "tokenizers>=0.22.2,<1", - "PyYAML>=6.0.3", + "PyYAML>=6.0.3,<7", ] [project.urls] diff --git a/tests/test_stac_generator.py b/tests/test_stac_generator.py index 8ae8a33..c220e2b 100644 --- a/tests/test_stac_generator.py +++ b/tests/test_stac_generator.py @@ -1,11 +1,8 @@ import unittest import tempfile import shutil -import os -from importlib import resources from earthcode.stac_generator import generate_stac -from test_utils import assertIsFile class TestStacGenerator(unittest.TestCase): @@ -17,24 +14,7 @@ def tearDown(self): # Remove the directory after the test shutil.rmtree(self.test_dir) - def test_generate_stac(self): - with (resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("project.yaml")) as project_template, - resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("workflow.yaml")) as workflow_template, - resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("experiment.yaml")) as experiment_template, - resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("product.yaml")) as product_template): - generate_stac(project=project_template, workflow=workflow_template, experiment=experiment_template, product=product_template, target=self.test_dir) - - project = os.path.join(self.test_dir, "project_collection.json") - workflow = os.path.join(self.test_dir, "workflow_record.json") - experiment = os.path.join(self.test_dir, "experiment_record.json") - product = os.path.join(self.test_dir, "product_collection.json") - - assertIsFile(project) - assertIsFile(workflow) - assertIsFile(experiment) - assertIsFile(product) - def test_generate_template_with_no_template_selected(self): with self.assertLogs(level='WARNING') as log: - generate_stac(project=None, workflow=None, experiment=None, product=None, target=self.test_dir) + generate_stac(project=None, workflow=None, experiment=None, product=None, osc_path=self.test_dir) self.assertIn("No template provided.", log.output[0]) From bf6cc9572a733d0235d96f7810305db390086e57 Mon Sep 17 00:00:00 2001 From: sunnydean Date: Wed, 1 Apr 2026 15:59:41 +0100 Subject: [PATCH 4/7] pixi lock fix, organise under folder --- earthcode/__init__.py | 4 ++-- earthcode/generators/__init__.py | 6 ++++++ .../{ => generators}/experiment_generator.py | 0 earthcode/{ => generators}/product_generator.py | 0 earthcode/{ => generators}/project_generator.py | 0 earthcode/{ => generators}/stac_generator.py | 17 ++++++++++------- .../{ => generators}/template_generator.py | 0 .../{ => generators}/workflow_generator.py | 0 pixi.lock | 3 ++- pyproject.toml | 4 ++-- tests/test_stac_generator.py | 2 +- tests/test_template_generator.py | 3 ++- 12 files changed, 25 insertions(+), 14 deletions(-) create mode 100644 earthcode/generators/__init__.py rename earthcode/{ => generators}/experiment_generator.py (100%) rename earthcode/{ => generators}/product_generator.py (100%) rename earthcode/{ => generators}/project_generator.py (100%) rename earthcode/{ => generators}/stac_generator.py (81%) rename earthcode/{ => generators}/template_generator.py (100%) rename earthcode/{ => generators}/workflow_generator.py (100%) diff --git a/earthcode/__init__.py b/earthcode/__init__.py index 844e727..6ea25f1 100644 --- a/earthcode/__init__.py +++ b/earthcode/__init__.py @@ -1,2 +1,2 @@ -from .stac_generator import generate_stac -from .template_generator import generate_template +from .generators.stac_generator import generate_stac +from .generators.template_generator import generate_template diff --git a/earthcode/generators/__init__.py b/earthcode/generators/__init__.py new file mode 100644 index 0000000..e97811e --- /dev/null +++ b/earthcode/generators/__init__.py @@ -0,0 +1,6 @@ +from .experiment_generator import create_experiment_stac_from_template +from .product_generator import create_product_stac_from_template +from .project_generator import create_project_stac_from_template +from .stac_generator import generate_stac +from .template_generator import generate_template +from .workflow_generator import create_workflow_stac_from_template diff --git a/earthcode/experiment_generator.py b/earthcode/generators/experiment_generator.py similarity index 100% rename from earthcode/experiment_generator.py rename to earthcode/generators/experiment_generator.py diff --git a/earthcode/product_generator.py b/earthcode/generators/product_generator.py similarity index 100% rename from earthcode/product_generator.py rename to earthcode/generators/product_generator.py diff --git a/earthcode/project_generator.py b/earthcode/generators/project_generator.py similarity index 100% rename from earthcode/project_generator.py rename to earthcode/generators/project_generator.py diff --git a/earthcode/stac_generator.py b/earthcode/generators/stac_generator.py similarity index 81% rename from earthcode/stac_generator.py rename to earthcode/generators/stac_generator.py index 939ab05..13f285f 100644 --- a/earthcode/stac_generator.py +++ b/earthcode/generators/stac_generator.py @@ -1,9 +1,12 @@ +import logging +import sys import argparse import os -from earthcode import project_generator, product_generator, workflow_generator, experiment_generator -import logging -import sys +from .experiment_generator import create_experiment_stac_from_template +from .product_generator import create_product_stac_from_template +from .project_generator import create_project_stac_from_template +from .workflow_generator import create_workflow_stac_from_template logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) log = logging.getLogger() @@ -22,16 +25,16 @@ def generate_stac(osc_path, project=None, workflow=None, experiment=None, produc if project is not None: log.info("Generating Project STAC json in OSC @ \"" + osc_path + "\"") - project_generator.create_project_stac_from_template(project, osc_path) + create_project_stac_from_template(project, osc_path) if workflow is not None: log.info("Generating Workflow STAC json in OSC @ \"" + osc_path + "\"") - workflow_generator.create_workflow_stac_from_template(workflow, osc_path) + create_workflow_stac_from_template(workflow, osc_path) if experiment is not None: log.info("Generating Experiment STAC json in OSC @ \"" + osc_path + "\"") - experiment_generator.create_experiment_stac_from_template(experiment, osc_path) + create_experiment_stac_from_template(experiment, osc_path) if product is not None: log.info("Generating Product STAC json in OSC @ \"" + osc_path + "\"") - product_generator.create_product_stac_from_template(product, osc_path) + create_product_stac_from_template(product, osc_path) if project is None and workflow is None and experiment is None and product is None: log.warning("No template provided." diff --git a/earthcode/template_generator.py b/earthcode/generators/template_generator.py similarity index 100% rename from earthcode/template_generator.py rename to earthcode/generators/template_generator.py diff --git a/earthcode/workflow_generator.py b/earthcode/generators/workflow_generator.py similarity index 100% rename from earthcode/workflow_generator.py rename to earthcode/generators/workflow_generator.py diff --git a/pixi.lock b/pixi.lock index 5b06723..6c3aaf2 100644 --- a/pixi.lock +++ b/pixi.lock @@ -4059,7 +4059,7 @@ packages: - pypi: ./ name: earthcode version: 0.1.7 - sha256: 6ec5b0cc1cd4e44fe0ddb5a810fa0f46abe68a296d494670a68ea6981bea69c5 + sha256: 86ade06f59e2da2c6dc7fcaa56947bb5586079cfb6bdd95c8b46791fe5480d20 requires_dist: - pystac>=1.14.1,<2 - xarray>=2025.12.0,<2026 @@ -4083,6 +4083,7 @@ packages: - pydantic>=2.12.5,<3 - requests>=2.32.5,<3 - tokenizers>=0.22.2,<1 + - pyyaml>=6.0.3,<7 - jupyter-book>=1.0.4,<2 ; extra == 'dev' - jupyterlab>=4.5.0,<5 ; extra == 'dev' - papermill>=2.7.0,<3 ; extra == 'dev' diff --git a/pyproject.toml b/pyproject.toml index 9674299..30a9421 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,5 +77,5 @@ exclude = ["earthcode/models/**"] packages = ["earthcode"] [project.scripts] -earthcode_template_gen = "earthcode.template_generator:main" -earthcode_stac_gen = "earthcode.stac_generator:main" \ No newline at end of file +earthcode_template_gen = "earthcode.generators.template_generator:main" +earthcode_stac_gen = "earthcode.generators.stac_generator:main" diff --git a/tests/test_stac_generator.py b/tests/test_stac_generator.py index c220e2b..81f364a 100644 --- a/tests/test_stac_generator.py +++ b/tests/test_stac_generator.py @@ -2,7 +2,7 @@ import tempfile import shutil -from earthcode.stac_generator import generate_stac +from earthcode.generators.stac_generator import generate_stac class TestStacGenerator(unittest.TestCase): diff --git a/tests/test_template_generator.py b/tests/test_template_generator.py index 7ab568c..4755980 100644 --- a/tests/test_template_generator.py +++ b/tests/test_template_generator.py @@ -2,10 +2,11 @@ import tempfile import shutil import os +import pathlib import filecmp from importlib import resources -from earthcode.template_generator import generate_template +from earthcode.generators.template_generator import generate_template from test_utils import assertIsFile From ac02a89698beab37745ee29fffb2de311cf5c3b4 Mon Sep 17 00:00:00 2001 From: sunnydean Date: Wed, 1 Apr 2026 16:43:34 +0100 Subject: [PATCH 5/7] adding end-to-end integration test for generation, integration, validation - and making corrections to yaml templates --- earthcode/generators/product_generator.py | 57 +++++++----- earthcode/templates/experiment.yaml | 9 +- earthcode/templates/product.yaml | 15 ++-- earthcode/templates/project.yaml | 2 +- tests/test_validation_templates.py | 101 ++++++++++++++++++++++ 5 files changed, 148 insertions(+), 36 deletions(-) create mode 100644 tests/test_validation_templates.py diff --git a/earthcode/generators/product_generator.py b/earthcode/generators/product_generator.py index b4908ea..55e1305 100644 --- a/earthcode/generators/product_generator.py +++ b/earthcode/generators/product_generator.py @@ -10,50 +10,61 @@ from earthcode.git_add import save_product_collection_to_catalog -logging.basicConfig(stream=sys.stdout, encoding='utf-8', level=logging.INFO) +logging.basicConfig(stream=sys.stdout, encoding="utf-8", level=logging.INFO) log = logging.getLogger() def create_product_stac_from_template(project_yaml, osc_path): - with (open(project_yaml, 'r') as file): + with open(project_yaml, "r") as file: data = yaml.safe_load(file) for k, v in data.items(): if v is None: - log.error(f"The Project YAML contains an empty value for the following field: {k}") - raise Exception(f"The Project YAML contains an empty value for the following field: {k}") + log.error( + f"The Project YAML contains an empty value for the following field: {k}" + ) + raise Exception( + f"The Project YAML contains an empty value for the following field: {k}" + ) # Define spatial and temporal extent - spatial_extent = pystac.SpatialExtent(data['extent']['spatial']['bbox']).bboxes - temporal_start = datetime.strptime(data['extent']['temporal']['start'], "%Y-%m-%dT%H:%M:%SZ") - temporal_end = datetime.strptime(data['extent']['temporal']['end'], "%Y-%m-%dT%H:%M:%SZ") + spatial_extent = pystac.SpatialExtent(data["extent"]["spatial"]["bbox"]).bboxes + temporal_start = datetime.strptime( + data["extent"]["temporal"]["start"], "%Y-%m-%dT%H:%M:%SZ" + ) + temporal_end = datetime.strptime( + data["extent"]["temporal"]["end"], "%Y-%m-%dT%H:%M:%SZ" + ) # optional - product_license = data['license'] - if product_license == 'other': - product_license_link = data['license_link'] + product_license = data["license"] + if product_license == "other": + product_license_link = data["license_link"] else: product_license_link = None + via_link = data.get("via_link", None) + product_metadata = ProductCollectionMetadata( - product_id=data['id'], - product_title=data['title'], - product_description=data['description'], - product_keywords=data['keywords'], - product_status=data['status'], - product_region=data['region'], - product_themes=data['themes'], - product_missions=data['missions'], - product_variables=data['variables'], - project_id=data['project'], - project_title=data['project-title'], - product_parameters=data['cf_parameters'], - product_doi=data['sci:doi'], + product_id=data["id"], + product_title=data["title"], + product_description=data["description"], + product_keywords=data["keywords"], + product_status=data["status"], + product_region=data["region"], + product_themes=data["themes"], + product_missions=data["missions"], + product_variables=data["variables"], + project_id=data["project"], + project_title=data["project-title"], + product_parameters=data["cf_parameters"], + product_doi=data["sci:doi"], product_bbox=spatial_extent, product_start_datetime=temporal_start, product_end_datetime=temporal_end, product_license=product_license, license_link=product_license_link, + access_link=via_link, ) product_collection = create_product_collection(product_metadata) diff --git a/earthcode/templates/experiment.yaml b/earthcode/templates/experiment.yaml index 09b3765..aa15e19 100644 --- a/earthcode/templates/experiment.yaml +++ b/earthcode/templates/experiment.yaml @@ -11,8 +11,8 @@ formats: # format of experiment output - PNG workflow: cool-project-workflow # id of the workflow used for this experiment workflow-title: Workflow to analyze Cool Project # title of the workflow used for this experiment -product: cool-project-output # id of the output product produced by this experiment -product-title: CoolProjectOutput # title of the output product produced by this experiment +product: cool-project-product # id of the output product produced by this experiment +product-title: Cool Project Product # title of the output product produced by this experiment # Define links, link relations and link titles: e.g. link to service used to run the experiment etc. link_env: https://myplatform.com @@ -31,8 +31,9 @@ contacts: - technical_officer organization: CGI links: - - https://www.cgi.com - - https://git.com/users/technical-officer + - rel: about + type: text/html + href: https://cgi.com/ contact_instructions: Contact preferably through project support page - name: Junior Staff diff --git a/earthcode/templates/product.yaml b/earthcode/templates/product.yaml index 3243e15..9828ddb 100644 --- a/earthcode/templates/product.yaml +++ b/earthcode/templates/product.yaml @@ -6,7 +6,7 @@ license: proprietary # license of the product status: completed # Product status region: cool-land # Text description of the study area project: cool-project # Unique id of the OSC project this product is related to. It must be the id provided in the ./project/ -project-title: Cool Project # title of the related project +project-title: CoolProjectTitle # title of the related project # Define project Themes. The fields are restricted to the themes available in the OCS and having at least one theme is mandatory. # Check available themes here: https://opensciencedata.esa.int/themes/catalog and https://github.com/ESA-EarthCODE/open-science-catalog-metadata/main/themes @@ -23,20 +23,17 @@ keywords: # in this list https://github.com/ESA-EarthCODE/open-science-catalog-metadata/tree/main/variables. # If you would like to associate your product to a variable that is not on the list, create variable entry first. variables: - - cool-stuff - - cool-things + - lightning # Array of CF Parameters: see https://github.com/stac-extensions/cf for more details cf_parameters: - - cool-stuff - - cool-tools + - lightning # Array of ESA missions related to the product. This array of values is mandatory and limited to missions already existing in the OSC # in this list: https://github.com/ESA-EarthCODE/open-science-catalog-metadata/tree/main/eo-missions. # If you would like to associate your product to a mission that is not on the list, create an eo-mission entry first. missions: - - cool-mission - - freezing-mission + - sentinel-2 # The Temporal and Spatial Extent of the product extent: @@ -51,4 +48,6 @@ extent: end: '2021-12-31T23:59:59Z' # DOI reference -sci:doi: https://doi.org/10.12345/abc-r4nd0mn \ No newline at end of file +sci:doi: https://doi.org/10.12345/abc-r4nd0mn + +via_link: https://myplatform.com/products/cool-project-product \ No newline at end of file diff --git a/earthcode/templates/project.yaml b/earthcode/templates/project.yaml index 811dd37..86dd450 100644 --- a/earthcode/templates/project.yaml +++ b/earthcode/templates/project.yaml @@ -1,6 +1,6 @@ --- # Define id, title, description, project status, license -id: cool-project-id # This is your project id. Please make sure to use unique id name for your project! The parent folder of the collection.json should have the same name as this id (not displayed in the browser). +id: cool-project # This is your project id. Please make sure to use unique id name for your project! The parent folder of the collection.json should have the same name as this id (not displayed in the browser). title: CoolProjectTitle # Title of your project. Official acronym of the project may be used as well (this will be displayed to public) description: Brief description of the project status: completed # status of the project - Select from: completed, ongoing, scheduled diff --git a/tests/test_validation_templates.py b/tests/test_validation_templates.py new file mode 100644 index 0000000..1ad6eaa --- /dev/null +++ b/tests/test_validation_templates.py @@ -0,0 +1,101 @@ +import shutil +from pathlib import Path +from uuid import uuid4 +import filecmp +import pytest +import pystac +import json + +from earthcode.validator import validate_catalog +from earthcode.static import ( + generate_OSC_dummy_entries, + add_item_link_to_product_collection, + create_item, +) +from earthcode.git_add import ( + save_product_collection_to_catalog, + save_workflow_record_to_osc, + save_project_collection_to_osc, + save_experiment_record_to_osc, + save_item_to_product_collection, + _add_link_if_missing, +) +from earthcode.static import create_item +from earthcode.metadata_input_definitions import ItemMetadata +from earthcode.generators import generate_template, generate_stac + + +### asummes a error free catalog +SOURCE_CATALOG = Path("../open-science-catalog-metadata/").resolve() + + +@pytest.fixture() +def catalog_root(tmp_path: Path) -> Path: + + if not SOURCE_CATALOG.exists(): + pytest.skip(f"Missing source catalog at {SOURCE_CATALOG}") + + target = tmp_path / "open-science-catalog-metadata" + shutil.copytree(SOURCE_CATALOG, target, ignore=shutil.ignore_patterns(".*")) + return target + + +def get_source_files(): + # return all files but ignore anything that starts with a .(dot) + source_files = { + f.relative_to(SOURCE_CATALOG) + for f in SOURCE_CATALOG.rglob("*") + if f.is_file() + and not any( + part.startswith(".") for part in f.relative_to(SOURCE_CATALOG).parts + ) + } + return source_files + + +def test_creation_and_validation(catalog_root: Path): + + generate_template( + project=True, + workflow=True, + experiment=True, + product=True, + target=str(catalog_root.parent), + ) + + generate_stac( + project=f"{catalog_root.parent / 'project.yaml'}", + experiment=f"{catalog_root.parent / 'experiment.yaml'}", + workflow=f"{catalog_root.parent / 'workflow.yaml'}", + product=f"{catalog_root.parent / 'product.yaml'}", + osc_path=str(catalog_root), + ) + + # assert that everything passes validation + errors, error_files = validate_catalog(catalog_root) + assert len(errors) == 0 + assert len(error_files) == 0 + + # count updated , deleted and created files + source_files = get_source_files() + target_files = { + f.relative_to(catalog_root) for f in catalog_root.rglob("*") if f.is_file() + } + + created_files = target_files - source_files + deleted_files = source_files - target_files + common_files = source_files & target_files + modified_files = set() + + for rel_path in common_files: + src_file = SOURCE_CATALOG / rel_path + tgt_file = catalog_root / rel_path + + # Setting shallow=False forces Python to compare the actual file contents + # rather than just checking OS metadata like modification times. + if not filecmp.cmp(src_file, tgt_file, shallow=False): + modified_files.add(rel_path) + + assert len(created_files) == 4 + assert len(deleted_files) == 0 + assert len(modified_files) == 8 From e3c6bcd452a667f02c65448d1e719976060ba4b0 Mon Sep 17 00:00:00 2001 From: sunnydean Date: Wed, 1 Apr 2026 17:02:00 +0100 Subject: [PATCH 6/7] refactoring tests in one file --- earthcode/generators/template_generator.py | 8 +- .../templates/experiment.yaml | 0 .../{ => generators}/templates/product.yaml | 0 .../{ => generators}/templates/project.yaml | 0 .../{ => generators}/templates/workflow.yaml | 0 tests/test_stac_generator.py | 20 -- tests/test_template_generator.py | 55 ------ tests/test_utils.py | 6 - tests/test_valid_generators.py | 179 ++++++++++++++++++ tests/test_validation_templates.py | 101 ---------- 10 files changed, 183 insertions(+), 186 deletions(-) rename earthcode/{ => generators}/templates/experiment.yaml (100%) rename earthcode/{ => generators}/templates/product.yaml (100%) rename earthcode/{ => generators}/templates/project.yaml (100%) rename earthcode/{ => generators}/templates/workflow.yaml (100%) delete mode 100644 tests/test_stac_generator.py delete mode 100644 tests/test_template_generator.py delete mode 100644 tests/test_utils.py create mode 100644 tests/test_valid_generators.py delete mode 100644 tests/test_validation_templates.py diff --git a/earthcode/generators/template_generator.py b/earthcode/generators/template_generator.py index 4f7bcf7..02cda80 100644 --- a/earthcode/generators/template_generator.py +++ b/earthcode/generators/template_generator.py @@ -32,22 +32,22 @@ def generate_template(project=False, workflow=False, experiment=False, product=F if project: log.info("Generating Project template at \""+target+"\"") - with resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("project.yaml")) as path: + with resources.as_file(resources.files("earthcode.generators").joinpath("templates").joinpath("project.yaml")) as path: shutil.copy(path, target) if workflow: log.info("Generating Workflow template at \""+target+"\"") - with resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("workflow.yaml")) as path: + with resources.as_file(resources.files("earthcode.generators").joinpath("templates").joinpath("workflow.yaml")) as path: shutil.copy(path, target) if experiment: log.info("Generating Experiment template at \""+target+"\"") - with resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("experiment.yaml")) as path: + with resources.as_file(resources.files("earthcode.generators").joinpath("templates").joinpath("experiment.yaml")) as path: shutil.copy(path, target) if product: log.info("Generating Product template at \""+target+"\"") - with resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("product.yaml")) as path: + with resources.as_file(resources.files("earthcode.generators").joinpath("templates").joinpath("product.yaml")) as path: shutil.copy(path, target) if not project and not workflow and not experiment and not product: diff --git a/earthcode/templates/experiment.yaml b/earthcode/generators/templates/experiment.yaml similarity index 100% rename from earthcode/templates/experiment.yaml rename to earthcode/generators/templates/experiment.yaml diff --git a/earthcode/templates/product.yaml b/earthcode/generators/templates/product.yaml similarity index 100% rename from earthcode/templates/product.yaml rename to earthcode/generators/templates/product.yaml diff --git a/earthcode/templates/project.yaml b/earthcode/generators/templates/project.yaml similarity index 100% rename from earthcode/templates/project.yaml rename to earthcode/generators/templates/project.yaml diff --git a/earthcode/templates/workflow.yaml b/earthcode/generators/templates/workflow.yaml similarity index 100% rename from earthcode/templates/workflow.yaml rename to earthcode/generators/templates/workflow.yaml diff --git a/tests/test_stac_generator.py b/tests/test_stac_generator.py deleted file mode 100644 index 81f364a..0000000 --- a/tests/test_stac_generator.py +++ /dev/null @@ -1,20 +0,0 @@ -import unittest -import tempfile -import shutil - -from earthcode.generators.stac_generator import generate_stac - - -class TestStacGenerator(unittest.TestCase): - def setUp(self): - # Create a temporary directory - self.test_dir = tempfile.mkdtemp() - - def tearDown(self): - # Remove the directory after the test - shutil.rmtree(self.test_dir) - - def test_generate_template_with_no_template_selected(self): - with self.assertLogs(level='WARNING') as log: - generate_stac(project=None, workflow=None, experiment=None, product=None, osc_path=self.test_dir) - self.assertIn("No template provided.", log.output[0]) diff --git a/tests/test_template_generator.py b/tests/test_template_generator.py deleted file mode 100644 index 4755980..0000000 --- a/tests/test_template_generator.py +++ /dev/null @@ -1,55 +0,0 @@ -import unittest -import tempfile -import shutil -import os -import pathlib -import filecmp -from importlib import resources - -from earthcode.generators.template_generator import generate_template -from test_utils import assertIsFile - - -def assertIsFile(path): - if not pathlib.Path(path).resolve().is_file(): - raise AssertionError("File does not exist: %s" % str(path)) - - -class TestTemplateGenerator(unittest.TestCase): - def setUp(self): - # Create a temporary directory - self.test_dir = tempfile.mkdtemp() - - def tearDown(self): - # Remove the directory after the test - shutil.rmtree(self.test_dir) - - def test_generate_template(self): - generate_template(project=True, workflow=True, experiment=True, product=True, target=self.test_dir) - - project = os.path.join(self.test_dir, "project.yaml") - workflow = os.path.join(self.test_dir, "workflow.yaml") - experiment = os.path.join(self.test_dir, "experiment.yaml") - product = os.path.join(self.test_dir, "product.yaml") - - assertIsFile(project) - assertIsFile(workflow) - assertIsFile(experiment) - assertIsFile(product) - - with resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("project.yaml")) as expected_project: - self.assertTrue(filecmp.cmp(project, expected_project), "The project template is different from the expected one") - - with resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("workflow.yaml")) as expected_workflow: - self.assertTrue(filecmp.cmp(workflow, expected_workflow), "The workflow template is different from the expected one") - - with resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("experiment.yaml")) as expected_experiment: - self.assertTrue(filecmp.cmp(experiment, expected_experiment), "The experiment template is different from the expected one") - - with resources.as_file(resources.files("earthcode").joinpath("templates").joinpath("product.yaml")) as expected_product: - self.assertTrue(filecmp.cmp(product, expected_product), "The product template is different from the expected one") - - def test_generate_template_with_no_template_selected(self): - with self.assertLogs(level='WARNING') as log: - generate_template(project=False, workflow=False, experiment=False, product=False, target=self.test_dir) - self.assertIn("No options selected.", log.output[0]) diff --git a/tests/test_utils.py b/tests/test_utils.py deleted file mode 100644 index c5f861d..0000000 --- a/tests/test_utils.py +++ /dev/null @@ -1,6 +0,0 @@ -import pathlib - - -def assertIsFile(path): - if not pathlib.Path(path).resolve().is_file(): - raise AssertionError("File does not exist: %s" % str(path)) diff --git a/tests/test_valid_generators.py b/tests/test_valid_generators.py new file mode 100644 index 0000000..db179c7 --- /dev/null +++ b/tests/test_valid_generators.py @@ -0,0 +1,179 @@ +import shutil +import tempfile +from pathlib import Path +from importlib import resources +import filecmp +import pytest +import os + +from earthcode.validator import validate_catalog +from earthcode.generators import generate_template, generate_stac + + +### asummes a error free catalog +SOURCE_CATALOG = Path("../open-science-catalog-metadata/").resolve() + + +@pytest.fixture() +def catalog_root(tmp_path: Path) -> Path: + + if not SOURCE_CATALOG.exists(): + pytest.skip(f"Missing source catalog at {SOURCE_CATALOG}") + + target = tmp_path / "open-science-catalog-metadata" + shutil.copytree(SOURCE_CATALOG, target, ignore=shutil.ignore_patterns(".*")) + return target + + +def get_source_files(): + # return all files but ignore anything that starts with a .(dot) + source_files = { + f.relative_to(SOURCE_CATALOG) + for f in SOURCE_CATALOG.rglob("*") + if f.is_file() + and not any( + part.startswith(".") for part in f.relative_to(SOURCE_CATALOG).parts + ) + } + return source_files + + +def assertIsFile(path): + if not Path(path).resolve().is_file(): + raise AssertionError("File does not exist: %s" % str(path)) + + +def test_creation_and_validation(catalog_root: Path): + + generate_template( + project=True, + workflow=True, + experiment=True, + product=True, + target=str(catalog_root.parent), + ) + + generate_stac( + project=f"{catalog_root.parent / 'project.yaml'}", + experiment=f"{catalog_root.parent / 'experiment.yaml'}", + workflow=f"{catalog_root.parent / 'workflow.yaml'}", + product=f"{catalog_root.parent / 'product.yaml'}", + osc_path=str(catalog_root), + ) + + # assert that everything passes validation + errors, error_files = validate_catalog(catalog_root) + assert len(errors) == 0 + assert len(error_files) == 0 + + # count updated , deleted and created files + source_files = get_source_files() + target_files = { + f.relative_to(catalog_root) for f in catalog_root.rglob("*") if f.is_file() + } + + created_files = target_files - source_files + deleted_files = source_files - target_files + common_files = source_files & target_files + modified_files = set() + + for rel_path in common_files: + src_file = SOURCE_CATALOG / rel_path + tgt_file = catalog_root / rel_path + + # Setting shallow=False forces Python to compare the actual file contents + # rather than just checking OS metadata like modification times. + if not filecmp.cmp(src_file, tgt_file, shallow=False): + modified_files.add(rel_path) + + assert len(created_files) == 4 + assert len(deleted_files) == 0 + assert len(modified_files) == 8 + + +def test_generate_template(): + test_dir = tempfile.mkdtemp() + try: + generate_template( + project=True, workflow=True, experiment=True, product=True, target=test_dir + ) + + project = os.path.join(test_dir, "project.yaml") + workflow = os.path.join(test_dir, "workflow.yaml") + experiment = os.path.join(test_dir, "experiment.yaml") + product = os.path.join(test_dir, "product.yaml") + + assertIsFile(project) + assertIsFile(workflow) + assertIsFile(experiment) + assertIsFile(product) + + with resources.as_file( + resources.files("earthcode.generators") + .joinpath("templates") + .joinpath("project.yaml") + ) as expected_project: + assert filecmp.cmp(project, expected_project), ( + "The project template is different from the expected one" + ) + + with resources.as_file( + resources.files("earthcode.generators") + .joinpath("templates") + .joinpath("workflow.yaml") + ) as expected_workflow: + assert filecmp.cmp(workflow, expected_workflow), ( + "The workflow template is different from the expected one" + ) + + with resources.as_file( + resources.files("earthcode.generators") + .joinpath("templates") + .joinpath("experiment.yaml") + ) as expected_experiment: + assert filecmp.cmp(experiment, expected_experiment), ( + "The experiment template is different from the expected one" + ) + + with resources.as_file( + resources.files("earthcode.generators") + .joinpath("templates") + .joinpath("product.yaml") + ) as expected_product: + assert filecmp.cmp(product, expected_product), ( + "The product template is different from the expected one" + ) + finally: + shutil.rmtree(test_dir) + + +def test_generate_template_with_no_template_selected_logs_warning(caplog): + test_dir = tempfile.mkdtemp() + try: + with caplog.at_level("WARNING"): + generate_template( + project=False, + workflow=False, + experiment=False, + product=False, + target=test_dir, + ) + assert "No options selected." in caplog.text + finally: + shutil.rmtree(test_dir) + + +def test_generate_stac_with_no_template_selected_logs_warning(caplog): + test_dir = tempfile.mkdtemp() + try: + with caplog.at_level("WARNING"): + generate_stac( + project=None, + workflow=None, + experiment=None, + product=None, + osc_path=test_dir, + ) + assert "No template provided." in caplog.text + finally: + shutil.rmtree(test_dir) diff --git a/tests/test_validation_templates.py b/tests/test_validation_templates.py deleted file mode 100644 index 1ad6eaa..0000000 --- a/tests/test_validation_templates.py +++ /dev/null @@ -1,101 +0,0 @@ -import shutil -from pathlib import Path -from uuid import uuid4 -import filecmp -import pytest -import pystac -import json - -from earthcode.validator import validate_catalog -from earthcode.static import ( - generate_OSC_dummy_entries, - add_item_link_to_product_collection, - create_item, -) -from earthcode.git_add import ( - save_product_collection_to_catalog, - save_workflow_record_to_osc, - save_project_collection_to_osc, - save_experiment_record_to_osc, - save_item_to_product_collection, - _add_link_if_missing, -) -from earthcode.static import create_item -from earthcode.metadata_input_definitions import ItemMetadata -from earthcode.generators import generate_template, generate_stac - - -### asummes a error free catalog -SOURCE_CATALOG = Path("../open-science-catalog-metadata/").resolve() - - -@pytest.fixture() -def catalog_root(tmp_path: Path) -> Path: - - if not SOURCE_CATALOG.exists(): - pytest.skip(f"Missing source catalog at {SOURCE_CATALOG}") - - target = tmp_path / "open-science-catalog-metadata" - shutil.copytree(SOURCE_CATALOG, target, ignore=shutil.ignore_patterns(".*")) - return target - - -def get_source_files(): - # return all files but ignore anything that starts with a .(dot) - source_files = { - f.relative_to(SOURCE_CATALOG) - for f in SOURCE_CATALOG.rglob("*") - if f.is_file() - and not any( - part.startswith(".") for part in f.relative_to(SOURCE_CATALOG).parts - ) - } - return source_files - - -def test_creation_and_validation(catalog_root: Path): - - generate_template( - project=True, - workflow=True, - experiment=True, - product=True, - target=str(catalog_root.parent), - ) - - generate_stac( - project=f"{catalog_root.parent / 'project.yaml'}", - experiment=f"{catalog_root.parent / 'experiment.yaml'}", - workflow=f"{catalog_root.parent / 'workflow.yaml'}", - product=f"{catalog_root.parent / 'product.yaml'}", - osc_path=str(catalog_root), - ) - - # assert that everything passes validation - errors, error_files = validate_catalog(catalog_root) - assert len(errors) == 0 - assert len(error_files) == 0 - - # count updated , deleted and created files - source_files = get_source_files() - target_files = { - f.relative_to(catalog_root) for f in catalog_root.rglob("*") if f.is_file() - } - - created_files = target_files - source_files - deleted_files = source_files - target_files - common_files = source_files & target_files - modified_files = set() - - for rel_path in common_files: - src_file = SOURCE_CATALOG / rel_path - tgt_file = catalog_root / rel_path - - # Setting shallow=False forces Python to compare the actual file contents - # rather than just checking OS metadata like modification times. - if not filecmp.cmp(src_file, tgt_file, shallow=False): - modified_files.add(rel_path) - - assert len(created_files) == 4 - assert len(deleted_files) == 0 - assert len(modified_files) == 8 From fc5c7a9e3f9be572a1e7e5388d97da0fbc17dd53 Mon Sep 17 00:00:00 2001 From: sunnydean Date: Wed, 1 Apr 2026 23:19:37 +0100 Subject: [PATCH 7/7] fixing the way we use schema files as resources to resolve correctly and be included in builds --- earthcode/validator.py | 29 +++++++++++++++-------------- pixi.lock | 2 +- pyproject.toml | 27 +++++++++++++++++++++++++++ tests/test_valid_generators.py | 7 +++++++ 4 files changed, 50 insertions(+), 15 deletions(-) diff --git a/earthcode/validator.py b/earthcode/validator.py index 341563a..dadf603 100644 --- a/earthcode/validator.py +++ b/earthcode/validator.py @@ -1,6 +1,7 @@ import json import os import re +from importlib import resources from pathlib import Path from typing import Dict, List, Any, Optional import pystac @@ -465,21 +466,21 @@ def _validate_experiment(ctx): def _validate_relative_schema(ctx, schema_file): + schema_resource = resources.files("earthcode").joinpath(*Path(schema_file).parts) + with resources.as_file(schema_resource) as schema_path: + with open(schema_path, 'r', encoding='utf-8') as f: + schema = json.load(f) - schema_file = Path(__file__).resolve().parent / schema_file - with open(schema_file, 'r', encoding='utf-8') as f: - schema = json.load(f) - - with open(ctx['file_path'], 'r', encoding='utf-8') as f: - data = json.load(f) + with open(ctx['file_path'], 'r', encoding='utf-8') as f: + data = json.load(f) - # Create a base URI for the folder containing the schema - base_uri = Path(schema_file).absolute().parent.as_uri() + "/" - resolver = RefResolver(base_uri=base_uri, referrer=schema) - try: - validate(instance=data, schema=schema, resolver=resolver) - except Exception as e: - ctx['errors'].append(e) + # Create a base URI for the folder containing the schema + base_uri = schema_path.absolute().parent.as_uri() + "/" + resolver = RefResolver(base_uri=base_uri, referrer=schema) + try: + validate(instance=data, schema=schema, resolver=resolver) + except Exception as e: + ctx['errors'].append(e) #TODO: Implement Item checks @@ -602,4 +603,4 @@ def validate_catalog(root_path): errors.append(file_errors) error_files.append(full_path) - return errors, error_files \ No newline at end of file + return errors, error_files diff --git a/pixi.lock b/pixi.lock index 6c3aaf2..4a79176 100644 --- a/pixi.lock +++ b/pixi.lock @@ -4059,7 +4059,7 @@ packages: - pypi: ./ name: earthcode version: 0.1.7 - sha256: 86ade06f59e2da2c6dc7fcaa56947bb5586079cfb6bdd95c8b46791fe5480d20 + sha256: 334a97b40d7f802bd26f7065e94c887678e16ee20bdb3b8ca51b473f23b85d87 requires_dist: - pystac>=1.14.1,<2 - xarray>=2025.12.0,<2026 diff --git a/pyproject.toml b/pyproject.toml index 30a9421..4bf7826 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,6 +72,33 @@ build-backend = "hatchling.build" [tool.hatch.build] exclude = ["earthcode/models/**"] +artifacts = [ + "earthcode/schemas/catalog.json", + "earthcode/schemas/collection.json", + "earthcode/schemas/contacts.json", + "earthcode/schemas/eo-missions/children.json", + "earthcode/schemas/eo-missions/parent.json", + "earthcode/schemas/experiments/children.json", + "earthcode/schemas/experiments/parent.json", + "earthcode/schemas/license.json", + "earthcode/schemas/osc.json", + "earthcode/schemas/products/children.json", + "earthcode/schemas/products/parent.json", + "earthcode/schemas/projects/children.json", + "earthcode/schemas/projects/parent.json", + "earthcode/schemas/records.json", + "earthcode/schemas/themes.json", + "earthcode/schemas/themes/children.json", + "earthcode/schemas/themes/parent.json", + "earthcode/schemas/variables/children.json", + "earthcode/schemas/variables/parent.json", + "earthcode/schemas/workflows/children.json", + "earthcode/schemas/workflows/parent.json", + "earthcode/generators/templates/experiment.yaml", + "earthcode/generators/templates/product.yaml", + "earthcode/generators/templates/project.yaml", + "earthcode/generators/templates/workflow.yaml", +] [tool.hatch.build.targets.wheel] packages = ["earthcode"] diff --git a/tests/test_valid_generators.py b/tests/test_valid_generators.py index db179c7..d4e4d23 100644 --- a/tests/test_valid_generators.py +++ b/tests/test_valid_generators.py @@ -147,6 +147,13 @@ def test_generate_template(): shutil.rmtree(test_dir) +def test_packaged_schema_resources_exist(): + with resources.as_file( + resources.files("earthcode").joinpath("schemas").joinpath("catalog.json") + ) as schema_path: + assert schema_path.is_file() + + def test_generate_template_with_no_template_selected_logs_warning(caplog): test_dir = tempfile.mkdtemp() try: