Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include definitions from GitHub repo #265

Merged
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 41 additions & 17 deletions nomenclature/codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
from pyam.utils import write_sheet
from pydantic import BaseModel, validator

import nomenclature
from nomenclature.code import Code, MetaCode, RegionCode, VariableCode
from nomenclature.config import DataStructureConfig
from nomenclature.countries import countries
from nomenclature.error.codelist import DuplicateCodeError
from nomenclature.error.variable import (
MissingWeightError,
Expand Down Expand Up @@ -191,28 +191,47 @@ def from_directory(
instance of cls (:class:`CodeList` if not inherited)

"""
code_list: List[Code] = []
code_list = cls._parse_codelist_dir(path, file_glob_pattern)

if config is not None:
dimension = path.name
codelistconfig = getattr(config, dimension, None)
if codelistconfig is not None and codelistconfig.repository is not None:
repo_path = (
config.repository[codelistconfig.repository].path
/ codelistconfig.repository_definition_path
)
code_list.extend(
cls._parse_codelist_dir(
repo_path,
file_glob_pattern,
)
)

mapping: Dict[str, Code] = {}
for code in code_list:
if code.name in mapping:
raise DuplicateCodeError(name=name, code=code.name)
mapping[code.name] = code
return cls(name=name, mapping=mapping)

@classmethod
def _parse_codelist_dir(cls, path: Path, file_glob_pattern: str = "**/*"):
code_list: List[Code] = []
for yaml_file in (
f
for f in path.glob(file_glob_pattern)
if f.suffix in {".yaml", ".yml"} and not f.name.startswith("tag_")
):
with open(yaml_file, "r", encoding="utf-8") as stream:
_code_list = yaml.safe_load(stream)

for code_dict in _code_list:
code = cls.code_basis.from_dict(code_dict)
# add `file` attribute
code.file = yaml_file.relative_to(path.parent).as_posix()
code_list.append(code)

code_list = cls._parse_and_replace_tags(code_list, path, file_glob_pattern)
mapping: Dict[str, Code] = {}
for code in code_list:
if code.name in mapping:
raise DuplicateCodeError(name=name, code=code.name)
mapping[code.name] = code
return cls(name=name, mapping=mapping)
return code_list

@classmethod
def read_excel(cls, name, source, sheet_name, col, attrs=None):
Expand Down Expand Up @@ -536,7 +555,7 @@ def from_directory(
Name of the CodeList
path : :class:`pathlib.Path` or path-like
Directory with the codelist files
config : :class:`DataStructureConfig`, optional
config : :class:`RegionCodeListConfig`, optional
Attributes for configuring the CodeList
file_glob_pattern : str, optional
Pattern to downselect codelist files by name, default: "**/*" (i.e. all
Expand All @@ -554,7 +573,7 @@ def from_directory(
if config is not None and config.region is not None:
# adding all countries
if config.region.country is True:
for c in countries:
for c in nomenclature.countries:
try:
code_list.append(
RegionCode(
Expand All @@ -566,12 +585,17 @@ def from_directory(
code_list.append(RegionCode(name=c.name, hierarchy="Country"))

# importing from an external repository
if repo := config.region.repository:
repo_path = path.parents[1] / repo
if not repo_path.exists():
raise FileNotFoundError(f"Repository not found: {repo}")
if config.region.repository:
repo_path = (
config.repository[config.region.repository].path
/ config.region.repository_definition_path
)

code_list = cls._parse_region_code_dir(
code_list, repo_path, file_glob_pattern, repository=repo
code_list,
repo_path,
file_glob_pattern,
repository=config.repository,
)
code_list = cls._parse_and_replace_tags(
code_list, repo_path, file_glob_pattern
Expand Down
89 changes: 85 additions & 4 deletions nomenclature/config.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,69 @@
from pathlib import Path
from typing import Dict, Optional
from pydantic import BaseModel
from typing import Optional, Dict
from pydantic import BaseModel, root_validator, validator

import yaml
from git import Repo


class CodeListConfig(BaseModel):
repository: Optional[Path]
dimension: str
repository: Optional[str]
repository_definition_path: Optional[str]
phackstock marked this conversation as resolved.
Show resolved Hide resolved

@root_validator()
def set_repository_definition_path(cls, v):
if (
v.get("repository") is not None
and v.get("repository_definition_path") is None
):
v["repository_definition_path"] = f"definitions/{v['dimension']}"
return v


class RegionCodeListConfig(CodeListConfig):
country: Optional[bool]


class Repository(BaseModel):
url: str
hash: Optional[str]
release: Optional[str]
path: Optional[Path]
phackstock marked this conversation as resolved.
Show resolved Hide resolved

@root_validator()
def check_hash_and_release(cls, v):
if v.get("hash") and v.get("release"):
raise ValueError("Either 'hash' or 'release' can be provided, not both.")
return v

@validator("path")
def check_path_empty(cls, v):
if v is not None:
raise ValueError("path must not be set as part of the config")
phackstock marked this conversation as resolved.
Show resolved Hide resolved
return v

@property
def revision(self):
return self.hash or self.release or "main"

def fetch_repo(self, to_path):
to_path = to_path if isinstance(to_path, Path) else Path(to_path)

if not to_path.is_dir():
repo = Repo.clone_from(self.url, to_path)
else:
repo = Repo(to_path)
repo.remotes.origin.fetch()
self.path = to_path
repo.git.reset("--hard")
repo.git.checkout(self.revision)
repo.git.reset("--hard")
repo.git.clean("-xdf")
if self.revision == "main":
repo.remotes.origin.pull()


class DataStructureConfig(BaseModel):
"""A class for configuration of a DataStructureDefinition

Expand All @@ -23,7 +74,32 @@ class DataStructureConfig(BaseModel):

"""

repository: Dict[str, Repository] = {}
region: Optional[RegionCodeListConfig]
variable: Optional[CodeListConfig]

file: Path

@validator("region", "variable", pre=True)
def add_dimension(cls, v, field):
return {"dimension": field.name, **v}

@root_validator
def check_repository_consistency(cls, values):
for dimension in ("region", "variable"):
if (
values.get("repository")
and values.get(dimension)
and values.get(dimension).repository
and values.get(dimension).repository not in values.get("repository")
):
raise ValueError(
(
f"Unknown repository {values.get(dimension).repository} in"
phackstock marked this conversation as resolved.
Show resolved Hide resolved
" region.repository."
phackstock marked this conversation as resolved.
Show resolved Hide resolved
)
)
return values

@classmethod
def from_file(cls, path: Path, file: str):
Expand All @@ -39,5 +115,10 @@ def from_file(cls, path: Path, file: str):
"""
with open(path / file, "r", encoding="utf-8") as stream:
config = yaml.safe_load(stream)
instance = cls(**config, file=path / file)
instance.fetch_repos()
return instance

return cls(region=RegionCodeListConfig(**config["region"]))
def fetch_repos(self):
for repo_name, repo in self.repository.items():
repo.fetch_repo(self.file.parent / repo_name)
6 changes: 3 additions & 3 deletions nomenclature/definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,13 @@ def __init__(self, path, dimensions=None):
file="config.yaml",
)
else:
self.config = DataStructureConfig()

self.config = None
self.dimensions = dimensions or ["region", "variable"]
for dim in self.dimensions:
codelist_cls = SPECIAL_CODELIST.get(dim, CodeList)
self.__setattr__(
dim, codelist_cls.from_directory(dim, path / dim, self.config)
dim,
codelist_cls.from_directory(dim, path / dim, self.config),
)

empty = [d for d in self.dimensions if not self.__getattribute__(d)]
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ install_requires =
pandas >= 1.5.2
numpy
pycountry
gitpython
setup_requires =
setuptools >= 41
setuptools_scm
Expand Down
8 changes: 7 additions & 1 deletion tests/data/general-config-definitions/config.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
repository:
common-definitions:
url: https://github.com/IAMconsortium/common-definitions.git/
region:
phackstock marked this conversation as resolved.
Show resolved Hide resolved
repository: validation_nc/region
repository: common-definitions
country: true
variable:
repository: common-definitions
path: definitions / variable
32 changes: 20 additions & 12 deletions tests/test_definition.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import shutil
import pytest
import pandas as pd
from nomenclature import DataStructureDefinition, create_yaml_from_xlsx
Expand Down Expand Up @@ -43,19 +44,26 @@ def test_empty_codelist_raises():
def test_definition_from_general_config():
obs = DataStructureDefinition(
TEST_DATA_DIR / "general-config-definitions",
dimensions=["region"],
dimensions=["region", "variable"],
)

# explicitly defined in `general-config-definitions/region/regions.yaml`
assert "Region A" in obs.region
# imported from `validation_nc` repo
assert "World" in obs.region
# added via general-config definitions
assert "Austria" in obs.region
# added via general-config definitions renamed from pycountry name
assert "Bolivia" in obs.region
# added via general-config definitions in addition to pycountry.countries
assert "Kosovo" in obs.region
try:
# explicitly defined in `general-config-definitions/region/regions.yaml`
assert "Region A" in obs.region
# imported from https://github.com/IAMconsortium/common-definitions repo
assert "World" in obs.region
# added via general-config definitions
assert "Austria" in obs.region
# added via general-config definitions renamed from pycountry name
assert "Bolivia" in obs.region
# added via general-config definitions in addition to pycountry.countries
assert "Kosovo" in obs.region

assert "Primary Energy" in obs.variable
phackstock marked this conversation as resolved.
Show resolved Hide resolved
finally:
# clean up the external repo
for repository in obs.config.repository.values():
if repository.path.exists():
shutil.rmtree(repository.path, ignore_errors=True)


def test_to_excel(simple_definition, tmpdir):
Expand Down
Loading