diff --git a/utils/README.md b/utils/README.md index 2ad9e5e..263edda 100644 --- a/utils/README.md +++ b/utils/README.md @@ -1,19 +1,18 @@ # OPL YAML utils -This folder contains utility scripts for working with the YAML format to describe problems in context of OPL. They are mainly intended to be run automatically via GitHub Actions to make collaboration easier. +This folder contains utility scripts for working with the YAML format to describe problems in context of OPL. Some of them are mainly intended to be run automatically via GitHub Actions to make collaboration easier, others are utility functions for maintainers. The intended way of adding a new problem to the repository is thus as follows: -* Create a file in 'utils/new_problem.yaml' based on the template (see below). +* Create a new yaml file based on the template (see below). +* Run the [merge script](merge_yaml.py) locally to update the [problems.yaml](../problems.yaml) file and check that the formatting is correct. * Create a PR with the changes (for example with a fork). What happens in the background then is: -* On PR creation and commits to the PR, the [validate_yaml.py](validate_yaml.py) script is run to check that the YAML file is valid and consistent. It is expecting the changes to be in the [new_problem.yaml](new_problem.yaml) file. +* On PR creation and commits to the PR, the [validate_yaml.py](validate_yaml.py) script is run to check that the [problems.yaml](../problems.yaml) file is still valid and consistent. * Then the PR should be reviewed manually. -* When the PR is merged into the main branch, a second script runs (which doesn't exist yet), that adds the content of [new_problem.yaml](new_problem.yaml) to the [problems.yaml](../problems.yaml) file, and reverts the changes to the new_problem.yaml. - -:warning: Note that the GitHubActions do not exist yet either, this is a WIP. +* When the PR is merged into the main branch with changes to problems.yaml, the checks are run again. ## validate_yaml.py @@ -21,13 +20,24 @@ This script checks the new content for the following: * The YAML syntax is valid and is in expected format * The required fields are present. -* Specific fields are unique across the new set of problems (e.g. name) +* Specific fields are unique across the set of problems (e.g. name) + +:warning: Execute from root of the repository. Tested with python 3.12 + +```bash +pip install -r utils/requirements.txt +python utils/validate_yaml.py problems.yaml +``` + +## merge_yaml.py + +This script merges a new problem description in a separate yaml file into the main [problems.yaml](../problems.yaml) file. It runs the validation checks from the above script before merging and deletes the separate yaml file after merging. :warning: Execute from root of the repository. Tested with python 3.12 ```bash pip install -r utils/requirements.txt -python utils/validate_yaml.py utils/new_problem.yaml +python utils/merge_yaml.py new_problem.yaml problems.yaml ``` ## new problem example diff --git a/utils/merge_yaml.py b/utils/merge_yaml.py new file mode 100644 index 0000000..6705197 --- /dev/null +++ b/utils/merge_yaml.py @@ -0,0 +1,96 @@ +import yaml +import sys +from pathlib import Path +from typing import List, Dict + +# Add parent directory to sys.path +parent = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(parent)) + +from utils.validate_yaml import read_data, validate_data, validate_yaml + + +def write_data(filepath: str, data: List[Dict]) -> bool: + try: + with open(filepath, "w") as f: + yaml.safe_dump(data, f, sort_keys=False) + print(f"::notice::Wrote data to {filepath}.") + except FileNotFoundError: + print(f"::error::File not found: {filepath}") + return False + except OSError as e: + print(f"::error::Error writing file {filepath}: {e}") + return False + except yaml.YAMLError as e: + print(f"::error::YAML syntax error: {e}") + return False + return True + + +def update_existing_data( + existing_data: List[Dict], new_data: List[Dict], out_file: str +) -> bool: + existing_data.extend(new_data) + # validate merged data before writing + valid = validate_data(existing_data) + if not valid: + print(f"::error::Merged data is not valid, cannot write to {out_file}.") + return False + write_success = write_data(out_file, existing_data) + return write_success + + +def merge_new_problems(new_problems_yaml_path: str, big_yaml_path: str) -> bool: + # Read and validate new data + new_data_status, new_data = read_data(new_problems_yaml_path) + if new_data_status != 0 or new_data is None: + print( + f"::error::New problems data could not be read from {new_problems_yaml_path}." + ) + return False + valid = validate_data(new_data) + if not valid: + print(f"::error::New problems data in {new_problems_yaml_path} is not valid.") + return False + + # Read existing data + existing_data_status, existing_data = read_data(big_yaml_path) + if existing_data_status != 0 or existing_data is None: + print( + f"::error::Existing problems data could not be read from {big_yaml_path}." + ) + return False + + # All valid, we can now just merge the dicts + assert existing_data is not None + assert new_data is not None + updated = update_existing_data(existing_data, new_data, big_yaml_path) + if not updated: + print(f"::error::Failed to update existing problems data in {big_yaml_path}.") + return False + + # Validate resulting data + final_status, final_data = validate_yaml(big_yaml_path) + if final_status != 0 or final_data is None: + print( + f"::error::Merged data in {big_yaml_path} is not valid after merging new problems." + ) + return False + + print( + f"::notice::Merged {len(new_data)} new problems into {big_yaml_path}. {new_problems_yaml_path} can now be deleted." + ) + return True + + +if __name__ == "__main__": + if len(sys.argv) != 3: + print("Usage: python merge_yaml.py ") + sys.exit(1) + new_problems_yaml_path = sys.argv[1] + big_yaml_path = sys.argv[2] + status = merge_new_problems(new_problems_yaml_path, big_yaml_path) + if not status: + sys.exit(1) + else: + sys.exit(0) diff --git a/utils/validate_yaml.py b/utils/validate_yaml.py index 34f1899..79f4238 100644 --- a/utils/validate_yaml.py +++ b/utils/validate_yaml.py @@ -2,6 +2,7 @@ import sys from pathlib import Path +from typing import List, Dict, Tuple # Add parent directory to sys.path parent = Path(__file__).resolve().parent.parent @@ -16,7 +17,7 @@ UNIQUE_WARNING_FIELDS = ["reference", "implementation"] -def read_data(filepath): +def read_data(filepath: str) -> Tuple[int, List[Dict] | None]: try: with open(filepath, "r") as f: data = yaml.safe_load(f) @@ -29,8 +30,11 @@ def read_data(filepath): return 1, None -def check_format(data): +def check_format(data: List[Dict]) -> bool: num_problems = len(data) + if not isinstance(data, list): + print("::error::YAML file should contain a list of entries.") + return False if len(data) < 1: print("::error::YAML file should contain at least one top level entry.") return False @@ -42,14 +46,16 @@ def check_format(data): return False unique_fields.append({k: v for k, v in entry.items() if k in UNIQUE_FIELDS}) for k in UNIQUE_FIELDS: - values = [entry[k] for entry in unique_fields] + values = [ + entry[k] for entry in unique_fields if k in entry and entry[k] is not None + ] if len(values) != len(set(values)): print(f"::error::Field '{k}' must be unique across all entries.") return False return True -def check_fields(data): +def check_fields(data: Dict) -> bool: missing = [field for field in REQUIRED_FIELDS if field not in data] if missing: print(f"::error::Missing required fields: {', '.join(missing)}") @@ -79,7 +85,7 @@ def check_fields(data): return True -def check_novelty(data, checked_data): +def check_novelty(data: Dict, checked_data: List[Dict]) -> bool: for field in UNIQUE_FIELDS + UNIQUE_WARNING_FIELDS: # skip empty fields if not data.get(field): @@ -101,13 +107,10 @@ def check_novelty(data, checked_data): return True -def validate_yaml(filepath): - status, data = read_data(filepath) - if status != 0: - sys.exit(1) - if not check_format(data): - sys.exit(1) +def validate_data(data: List[Dict]) -> bool: assert data is not None + if not check_format(data): + return False checked_data = [] @@ -115,12 +118,24 @@ def validate_yaml(filepath): # Check required and unique fields if not check_fields(new_data) or not check_novelty(new_data, checked_data): print(f"::error::Validation failed for entry {i+1}.") - sys.exit(1) + return False checked_data.append(new_data) # Add to checked data for novelty checks # YAML is valid if we reach this point print("YAML syntax is valid.") - sys.exit(0) + + return True + + +def validate_yaml(filepath: str) -> None: + status, data = read_data(filepath) + if status != 0 or data is None: + sys.exit(1) + valid = validate_data(data) + if not valid: + sys.exit(1) + else: + sys.exit(0) if __name__ == "__main__":