Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a general-config feature to add all countries to RegionCodeList #262

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ API documentation

api/nomenclature
api/datastructuredefinition
api/datastructureconfig
api/codelist
api/regionprocessor
api/testing
7 changes: 7 additions & 0 deletions docs/api/datastructureconfig.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
.. currentmodule:: nomenclature.config

**DataStructureConfig**
=======================

.. autoclass:: DataStructureConfig
:members: from_file
2 changes: 1 addition & 1 deletion docs/api/datastructuredefinition.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
===========================

.. autoclass:: DataStructureDefinition
:members:
:members: validate, check_aggregate, to_excel
4 changes: 4 additions & 0 deletions docs/user_guide/directory-structure.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ This is the directory structure for validation and region processing:

.
├── definitions
│ ├── config.yaml
│ ├── region
│ │ ├── regions.yaml
│ │ └── ...
Expand All @@ -32,5 +33,8 @@ The :class:`DataStructureDefinition` reads the codelists from the *definitions*
object is initialized, all files in a dimension folder are combined into a single
:class:`CodeList` object for that dimension.

* General configurations of the :class:`DataStructureDefinition` can be specified
via the *config.yaml* file.

The :class:`RegionProcessor` reads model-specific region-mappings from the *mappings*
folder. If the project has no model specific mappings, this folder can also be omitted.
2 changes: 2 additions & 0 deletions nomenclature/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ class RegionCode(Code):
Name of the RegionCode
hierarchy : str
Hierarchy of the RegionCode
iso3_codes : str or list of str
ISO3 codes of countries in that region

"""

Expand Down
66 changes: 62 additions & 4 deletions nomenclature/codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,43 @@
from pyam.utils import write_sheet
from pydantic import BaseModel, validator

from pycountry import countries

from nomenclature.code import Code, MetaCode, RegionCode, VariableCode
from nomenclature.config import DataStructureConfig
from nomenclature.error.codelist import DuplicateCodeError
from nomenclature.error.variable import (
MissingWeightError,
VariableRenameArgError,
VariableRenameTargetError,
)


# The RegionCodeList uses pycountry to (optionally) add all countries and ISO3 codes
# For readability and in line with conventions of the IAMC community,
# several "standard" country names are shortened
# Please keep this list in sync with `templates/model-registration-template.xlsx`
PYCOUNTRY_NAME_OVERRIDE = {
"Bolivia, Plurinational State of": "Bolivia",
"Holy See (Vatican City State)": "Vatican",
"Micronesia, Federated States of": "Micronesia",
"Congo, The Democratic Republic of the": "Democratic Republic of the Congo",
"Iran, Islamic Republic of": "Iran",
"Korea, Republic of": "South Korea",
"Korea, Democratic People's Republic of": "North Korea",
"Lao People's Democratic Republic": "Laos",
"Syrian Arab Republic": "Syria",
"Moldova, Republic of": "Moldova",
"Tanzania, United Republic of": "Tanzania",
"Venezuela, Bolivarian Republic of": "Venezuela",
"Palestine, State of": "Palestine",
"Taiwan, Province of China": "Taiwan",
}
PYCOUNTRY_NAME_ADD = [
"Kosovo",
]


here = Path(__file__).parent.absolute()


Expand Down Expand Up @@ -164,7 +193,13 @@ def _parse_and_replace_tags(
return codes_without_tags + codes_with_tags

@classmethod
def from_directory(cls, name: str, path: Path, file_glob_pattern: str = "**/*"):
def from_directory(
cls,
name: str,
path: Path,
config: DataStructureConfig = None,
file_glob_pattern: str = "**/*",
):
"""Initialize a CodeList from a directory with codelist files

Parameters
Expand All @@ -173,12 +208,14 @@ def from_directory(cls, name: str, path: Path, file_glob_pattern: str = "**/*"):
Name of the CodeList
path : :class:`pathlib.Path` or path-like
Directory with the codelist files
config: :class:`DataStructureConfig`, optional
Attributes for configuring the CodeList
file_glob_pattern : str, optional
Pattern to downselect codelist files by name

Returns
-------
instance of cls (CodeList if not inherited)
instance of cls (:class:`CodeList` if not inherited)

"""
code_list: List[Code] = []
Expand Down Expand Up @@ -511,7 +548,13 @@ class RegionCodeList(CodeList):
validation_schema: ClassVar[str] = "region"

@classmethod
def from_directory(cls, name: str, path: Path, file_glob_pattern: str = "**/*"):
def from_directory(
cls,
name: str,
path: Path,
config: DataStructureConfig = None,
file_glob_pattern: str = "**/*",
):
"""Initialize a RegionCodeList from a directory with codelist files

Parameters
Expand All @@ -520,6 +563,8 @@ def from_directory(cls, name: str, path: Path, file_glob_pattern: str = "**/*"):
Name of the CodeList
path : :class:`pathlib.Path` or path-like
Directory with the codelist files
config : :class:`DataStructureConfig`, optional
Attributes for configuring the CodeList
file_glob_pattern : str, optional
Pattern to downselect codelist files by name, default: "**/*" (i.e. all
files in all sub-folders)
Expand All @@ -529,9 +574,22 @@ def from_directory(cls, name: str, path: Path, file_glob_pattern: str = "**/*"):
RegionCodeList

"""
mapping: Dict[str, RegionCode] = {}

code_list: List[RegionCode] = []

if config is not None and config.region is not None:
if config.region.country is True:
for i in countries:
code_list.append(
RegionCode(
name=PYCOUNTRY_NAME_OVERRIDE.get(i.name, i.name),
iso3_codes=i.alpha_3,
hierarchy="Country",
)
)
for c in PYCOUNTRY_NAME_ADD:
code_list.append(RegionCode(name=c, hierarchy="Country"))

for yaml_file in (
f
for f in path.glob(file_glob_pattern)
Expand Down
39 changes: 39 additions & 0 deletions nomenclature/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from pathlib import Path
from typing import Dict, Optional
from pydantic import BaseModel

import yaml


class RegionCodeListConfig(BaseModel):
country: Optional[bool]


class DataStructureConfig(BaseModel):
"""A class for configuration of a DataStructureDefinition

Attributes
----------
region : RegionCodeListConfig
Attributes for configuring the RegionCodeList

"""

region: Optional[RegionCodeListConfig]

@classmethod
def from_file(cls, path: Path, file: str):
"""Read a DataStructureConfig from a file

Parameters
----------
path : :class:`pathlib.Path` or path-like
`definitions` directory
file : str
File name

"""
with open(path / file, "r", encoding="utf-8") as stream:
config = yaml.safe_load(stream)

return cls(region=RegionCodeListConfig(**config["region"]))
17 changes: 12 additions & 5 deletions nomenclature/definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
VariableCodeList,
MetaCodeList,
)
from nomenclature.config import DataStructureConfig
from nomenclature.validation import validate

logger = logging.getLogger(__name__)
Expand All @@ -37,19 +38,25 @@ def __init__(self, path, dimensions=None):
from a sub-folder of `path` of that name.
"""

if dimensions is None:
dimensions = ["region", "variable"]

if not isinstance(path, Path):
path = Path(path)

if not path.is_dir():
raise NotADirectoryError(f"Definitions directory not found: {path}")

self.dimensions = dimensions
if (path / "config.yaml").exists():
self.config = DataStructureConfig.from_file(
path=path,
file="config.yaml",
)
else:
self.config = DataStructureConfig()

self.dimensions = dimensions or ["region", "variable"]
for dim in self.dimensions:
codelist_cls = SPECIAL_CODELIST.get(dim, CodeList)
self.__setattr__(
dim, SPECIAL_CODELIST.get(dim, CodeList).from_directory(dim, path / dim)
dim, codelist_cls.from_directory(dim, path / dim, self.config)
)

empty = [d for d in self.dimensions if not self.__getattribute__(d)]
Expand Down
Binary file modified templates/model-registration-template.xlsx
Binary file not shown.
2 changes: 2 additions & 0 deletions tests/data/general-config-definitions/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
region:
country: true
2 changes: 2 additions & 0 deletions tests/data/general-config-definitions/region/regions.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- common:
- World
16 changes: 16 additions & 0 deletions tests/test_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,22 @@ def test_empty_codelist_raises():
DataStructureDefinition(TEST_DATA_DIR / "simple_codelist")


def test_definition_from_general_config():
obs = DataStructureDefinition(
TEST_DATA_DIR / "general-config-definitions",
dimensions=["region"],
)

# explicitly defined in `general-config-definitions/region/regions.yaml`
assert "World" in obs.region
# added via general-config definitions
assert "Austria" in obs.region
# added via general-config definitions renamed from pycountry name
assert "Bolivia" in obs.region
# added via general-config definitions in addition to pycountry.countries
assert "Kosovo" in obs.region


def test_to_excel(simple_definition, tmpdir):
"""Check writing a DataStructureDefinition to file"""
file = tmpdir / "testing_export.xlsx"
Expand Down
Loading