Skip to content

Commit

Permalink
Add a general-config feature to add all countries to RegionCodeList (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
danielhuppmann committed Jul 13, 2023
1 parent 0a99ba8 commit 7613986
Show file tree
Hide file tree
Showing 12 changed files with 148 additions and 10 deletions.
1 change: 1 addition & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ API documentation

api/nomenclature
api/datastructuredefinition
api/datastructureconfig
api/codelist
api/regionprocessor
api/testing
7 changes: 7 additions & 0 deletions docs/api/datastructureconfig.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
.. currentmodule:: nomenclature.config

**DataStructureConfig**
=======================

.. autoclass:: DataStructureConfig
:members: from_file
2 changes: 1 addition & 1 deletion docs/api/datastructuredefinition.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
===========================

.. autoclass:: DataStructureDefinition
:members:
:members: validate, check_aggregate, to_excel
4 changes: 4 additions & 0 deletions docs/user_guide/directory-structure.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ This is the directory structure for validation and region processing:
.
├── definitions
│ ├── config.yaml
│ ├── region
│ │ ├── regions.yaml
│ │ └── ...
Expand All @@ -32,5 +33,8 @@ The :class:`DataStructureDefinition` reads the codelists from the *definitions*
object is initialized, all files in a dimension folder are combined into a single
:class:`CodeList` object for that dimension.

* General configurations of the :class:`DataStructureDefinition` can be specified
via the *config.yaml* file.

The :class:`RegionProcessor` reads model-specific region-mappings from the *mappings*
folder. If the project has no model specific mappings, this folder can also be omitted.
2 changes: 2 additions & 0 deletions nomenclature/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ class RegionCode(Code):
Name of the RegionCode
hierarchy : str
Hierarchy of the RegionCode
iso3_codes : str or list of str
ISO3 codes of countries in that region
"""

Expand Down
66 changes: 62 additions & 4 deletions nomenclature/codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,43 @@
from pyam.utils import write_sheet
from pydantic import BaseModel, validator

from pycountry import countries

from nomenclature.code import Code, MetaCode, RegionCode, VariableCode
from nomenclature.config import DataStructureConfig
from nomenclature.error.codelist import DuplicateCodeError
from nomenclature.error.variable import (
MissingWeightError,
VariableRenameArgError,
VariableRenameTargetError,
)


# The RegionCodeList uses pycountry to (optionally) add all countries and ISO3 codes
# For readability and in line with conventions of the IAMC community,
# several "standard" country names are shortened
# Please keep this list in sync with `templates/model-registration-template.xlsx`
PYCOUNTRY_NAME_OVERRIDE = {
"Bolivia, Plurinational State of": "Bolivia",
"Holy See (Vatican City State)": "Vatican",
"Micronesia, Federated States of": "Micronesia",
"Congo, The Democratic Republic of the": "Democratic Republic of the Congo",
"Iran, Islamic Republic of": "Iran",
"Korea, Republic of": "South Korea",
"Korea, Democratic People's Republic of": "North Korea",
"Lao People's Democratic Republic": "Laos",
"Syrian Arab Republic": "Syria",
"Moldova, Republic of": "Moldova",
"Tanzania, United Republic of": "Tanzania",
"Venezuela, Bolivarian Republic of": "Venezuela",
"Palestine, State of": "Palestine",
"Taiwan, Province of China": "Taiwan",
}
PYCOUNTRY_NAME_ADD = [
"Kosovo",
]


here = Path(__file__).parent.absolute()


Expand Down Expand Up @@ -164,7 +193,13 @@ def _parse_and_replace_tags(
return codes_without_tags + codes_with_tags

@classmethod
def from_directory(cls, name: str, path: Path, file_glob_pattern: str = "**/*"):
def from_directory(
cls,
name: str,
path: Path,
config: DataStructureConfig = None,
file_glob_pattern: str = "**/*",
):
"""Initialize a CodeList from a directory with codelist files
Parameters
Expand All @@ -173,12 +208,14 @@ def from_directory(cls, name: str, path: Path, file_glob_pattern: str = "**/*"):
Name of the CodeList
path : :class:`pathlib.Path` or path-like
Directory with the codelist files
config: :class:`DataStructureConfig`, optional
Attributes for configuring the CodeList
file_glob_pattern : str, optional
Pattern to downselect codelist files by name
Returns
-------
instance of cls (CodeList if not inherited)
instance of cls (:class:`CodeList` if not inherited)
"""
code_list: List[Code] = []
Expand Down Expand Up @@ -511,7 +548,13 @@ class RegionCodeList(CodeList):
validation_schema: ClassVar[str] = "region"

@classmethod
def from_directory(cls, name: str, path: Path, file_glob_pattern: str = "**/*"):
def from_directory(
cls,
name: str,
path: Path,
config: DataStructureConfig = None,
file_glob_pattern: str = "**/*",
):
"""Initialize a RegionCodeList from a directory with codelist files
Parameters
Expand All @@ -520,6 +563,8 @@ def from_directory(cls, name: str, path: Path, file_glob_pattern: str = "**/*"):
Name of the CodeList
path : :class:`pathlib.Path` or path-like
Directory with the codelist files
config : :class:`DataStructureConfig`, optional
Attributes for configuring the CodeList
file_glob_pattern : str, optional
Pattern to downselect codelist files by name, default: "**/*" (i.e. all
files in all sub-folders)
Expand All @@ -529,9 +574,22 @@ def from_directory(cls, name: str, path: Path, file_glob_pattern: str = "**/*"):
RegionCodeList
"""
mapping: Dict[str, RegionCode] = {}

code_list: List[RegionCode] = []

if config is not None and config.region is not None:
if config.region.country is True:
for i in countries:
code_list.append(
RegionCode(
name=PYCOUNTRY_NAME_OVERRIDE.get(i.name, i.name),
iso3_codes=i.alpha_3,
hierarchy="Country",
)
)
for c in PYCOUNTRY_NAME_ADD:
code_list.append(RegionCode(name=c, hierarchy="Country"))

for yaml_file in (
f
for f in path.glob(file_glob_pattern)
Expand Down
39 changes: 39 additions & 0 deletions nomenclature/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from pathlib import Path
from typing import Dict, Optional
from pydantic import BaseModel

import yaml


class RegionCodeListConfig(BaseModel):
country: Optional[bool]


class DataStructureConfig(BaseModel):
"""A class for configuration of a DataStructureDefinition
Attributes
----------
region : RegionCodeListConfig
Attributes for configuring the RegionCodeList
"""

region: Optional[RegionCodeListConfig]

@classmethod
def from_file(cls, path: Path, file: str):
"""Read a DataStructureConfig from a file
Parameters
----------
path : :class:`pathlib.Path` or path-like
`definitions` directory
file : str
File name
"""
with open(path / file, "r", encoding="utf-8") as stream:
config = yaml.safe_load(stream)

return cls(region=RegionCodeListConfig(**config["region"]))
17 changes: 12 additions & 5 deletions nomenclature/definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
VariableCodeList,
MetaCodeList,
)
from nomenclature.config import DataStructureConfig
from nomenclature.validation import validate

logger = logging.getLogger(__name__)
Expand All @@ -37,19 +38,25 @@ def __init__(self, path, dimensions=None):
from a sub-folder of `path` of that name.
"""

if dimensions is None:
dimensions = ["region", "variable"]

if not isinstance(path, Path):
path = Path(path)

if not path.is_dir():
raise NotADirectoryError(f"Definitions directory not found: {path}")

self.dimensions = dimensions
if (path / "config.yaml").exists():
self.config = DataStructureConfig.from_file(
path=path,
file="config.yaml",
)
else:
self.config = DataStructureConfig()

self.dimensions = dimensions or ["region", "variable"]
for dim in self.dimensions:
codelist_cls = SPECIAL_CODELIST.get(dim, CodeList)
self.__setattr__(
dim, SPECIAL_CODELIST.get(dim, CodeList).from_directory(dim, path / dim)
dim, codelist_cls.from_directory(dim, path / dim, self.config)
)

empty = [d for d in self.dimensions if not self.__getattribute__(d)]
Expand Down
Binary file modified templates/model-registration-template.xlsx
Binary file not shown.
2 changes: 2 additions & 0 deletions tests/data/general-config-definitions/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
region:
country: true
2 changes: 2 additions & 0 deletions tests/data/general-config-definitions/region/regions.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- common:
- World
16 changes: 16 additions & 0 deletions tests/test_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,22 @@ def test_empty_codelist_raises():
DataStructureDefinition(TEST_DATA_DIR / "simple_codelist")


def test_definition_from_general_config():
obs = DataStructureDefinition(
TEST_DATA_DIR / "general-config-definitions",
dimensions=["region"],
)

# explicitly defined in `general-config-definitions/region/regions.yaml`
assert "World" in obs.region
# added via general-config definitions
assert "Austria" in obs.region
# added via general-config definitions renamed from pycountry name
assert "Bolivia" in obs.region
# added via general-config definitions in addition to pycountry.countries
assert "Kosovo" in obs.region


def test_to_excel(simple_definition, tmpdir):
"""Check writing a DataStructureDefinition to file"""
file = tmpdir / "testing_export.xlsx"
Expand Down

0 comments on commit 7613986

Please sign in to comment.