Skip to content

Commit

Permalink
Extend the to_excel() method and add to CLI (#331)
Browse files Browse the repository at this point in the history
* Make black

* Streamline writing a codelist to xlsx

* Use ExcelFile context manager
  • Loading branch information
danielhuppmann committed Mar 12, 2024
1 parent 25dc55b commit 4fbfc61
Show file tree
Hide file tree
Showing 8 changed files with 142 additions and 43 deletions.
19 changes: 19 additions & 0 deletions nomenclature/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,3 +154,22 @@ def check_region_aggregation(
results_df.to_excel(processed_data)
if differences:
differences_df.reset_index().to_excel(differences, index=False)


@cli.command("export-definition")
@click.argument("path", type=click.Path(exists=True, path_type=Path))
@click.argument("target", type=click.Path(path_type=Path))
def cli_export_definition_to_excel(
path: Path,
target: Path,
):
"""Assert that `path` is a valid project nomenclature
Parameters
----------
path : Path
Project directory to be exported
target : Path
Path and file name for the exported file
"""
DataStructureDefinition(path / "definitions").to_excel(target)
25 changes: 6 additions & 19 deletions nomenclature/codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,9 +335,6 @@ def to_pandas(self, sort_by_code: bool = False) -> pd.DataFrame:
)
if sort_by_code:
codelist.sort_values(by=self.name, inplace=True)
codelist.rename(
columns={c: str(c).capitalize() for c in codelist.columns}, inplace=True
)
return codelist

def to_csv(self, path=None, sort_by_code: bool = False, **kwargs):
Expand Down Expand Up @@ -378,22 +375,12 @@ def to_excel(
**kwargs
Passed to :class:`pandas.ExcelWriter` (if *excel_writer* is path-like).
"""

# default sheet_name to the name of the codelist
if sheet_name is None:
sheet_name = self.name

# open a new ExcelWriter instance (if necessary)
close = False
if not isinstance(excel_writer, pd.ExcelWriter):
close = True
excel_writer = pd.ExcelWriter(excel_writer, **kwargs)

write_sheet(excel_writer, sheet_name, self.to_pandas(sort_by_code))

# close the file if `excel_writer` arg was a file name
if close:
excel_writer.close()
sheet_name = sheet_name or self.name
if isinstance(excel_writer, pd.ExcelWriter):
write_sheet(excel_writer, sheet_name, self.to_pandas(sort_by_code))
else:
with pd.ExcelWriter(excel_writer, **kwargs) as writer:
write_sheet(writer, sheet_name, self.to_pandas(sort_by_code))

def codelist_repr(self, json_serialized=False) -> Dict:
"""Cast a CodeList into corresponding dictionary"""
Expand Down
87 changes: 72 additions & 15 deletions nomenclature/definition.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import logging
from datetime import datetime
from pathlib import Path

import pandas as pd
import git
from pyam import IamDataFrame
from pyam.index import replace_index_labels
from pyam.logging import adjust_log_level
from pyam.utils import write_sheet

from nomenclature.codelist import (
CodeList,
Expand Down Expand Up @@ -41,11 +44,18 @@ def __init__(self, path, dimensions=None):
if not isinstance(path, Path):
path = Path(path)

if (file := path.parent / "nomenclature.yaml").exists():
self.project_folder = path.parent

if (file := self.project_folder / "nomenclature.yaml").exists():
self.config = NomenclatureConfig.from_file(file=file)
else:
self.config = NomenclatureConfig()

try:
self.repo = git.Repo(self.project_folder)
except git.InvalidGitRepositoryError:
self.repo = None

if not path.is_dir() and not (
self.config.repositories or self.config.definitions.region.country
):
Expand Down Expand Up @@ -136,22 +146,69 @@ def check_aggregate(self, df: IamDataFrame, **kwargs) -> None:
error = pd.concat(lst)
return error if not error.empty else None

def to_excel(
self, excel_writer, sheet_name=None, sort_by_code: bool = False, **kwargs
):
"""Write the *variable* codelist to an Excel sheet
def to_excel(self, excel_writer, **kwargs):
"""Write the codelists to an xlsx spreadsheet
Parameters
----------
excel_writer : path-like, file-like, or ExcelWriter object
File path as string or :class:`pathlib.Path`,
or existing :class:`pandas.ExcelWriter`.
sheet_name : str, optional
Name of sheet that will have the codelist. If *None*, use the codelist name.
sort_by_code : bool, optional
Sort the codelist before exporting to file.
excel_writer : str or :class:`pathlib.Path`
File path as string or :class:`pathlib.Path`.
**kwargs
Passed to :class:`pandas.ExcelWriter` (if *excel_writer* is path-like).
Passed to :class:`pandas.ExcelWriter`
"""
# TODO write all dimensions to the file
self.variable.to_excel(excel_writer, sheet_name, sort_by_code, **kwargs)
if "engine" not in kwargs:
kwargs["engine"] = "xlsxwriter"

with pd.ExcelWriter(excel_writer, **kwargs) as writer:

# create dataframe with attributes of the DataStructureDefinition
project = self.project_folder.absolute().parts[-1]
arg_dict = {
"project": project,
"file_created": time_format(datetime.now()),
"": "",
}
if self.repo is not None:
arg_dict.update(git_attributes(project, self.repo))

ret = make_dataframe(arg_dict)

for key, value in self.config.repositories.items():
ret = pd.concat(
[
ret,
make_dataframe(git_attributes(key, git.Repo(value.local_path))),
]
)

write_sheet(writer, "project", ret)

# write codelist for each dimensions to own sheet
for dim in self.dimensions:
getattr(self, dim).to_excel(writer, dim, sort_by_code=True)


def time_format(x):
return x.strftime("%Y-%m-%d %H:%M:%S")


def git_attributes(name, repo):
if repo.is_dirty():
raise ValueError(f"Repository '{name}' is dirty")
return {
f"{name}.url": repo.remote().url,
f"{name}.commit_hash": repo.commit(),
f"{name}.commit_timestamp": time_format(repo.commit().committed_datetime),
}


def make_dataframe(data):
return (
pd.DataFrame.from_dict(
data,
orient="index",
columns=["value"],
)
.reset_index()
.rename(columns={"index": "attribute"})
)
Binary file modified tests/data/excel_io/validation_nc.xlsx
Binary file not shown.
Binary file modified tests/data/excel_io/validation_nc_list_arg.xlsx
Binary file not shown.
21 changes: 21 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def test_cli_installed():
command in result.stdout
for command in (
"check-region-aggregation",
"export-definition",
"validate-project",
"validate-yaml",
)
Expand Down Expand Up @@ -312,3 +313,23 @@ def test_check_region_aggregation(tmp_path):
)
)
assert_iamframe_equal(IamDataFrame(tmp_path / "results.xlsx"), exp_result)


def test_cli_export_to_excel(tmpdir):
"""Assert that writing a DataStructureDefinition to excel works as expected"""
file = tmpdir / "testing_export.xlsx"

assert (
runner.invoke(
cli,
[
"export-definition",
str(TEST_DATA_DIR / "general-config"),
str(file),
],
).exit_code
== 0
)

with pd.ExcelFile(file) as obs:
assert obs.sheet_names == ["project", "region", "variable"]
10 changes: 5 additions & 5 deletions tests/test_codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def test_to_excel(tmpdir):

(
VariableCodeList.from_directory(
"Variable", TEST_DATA_DIR / "validation_nc" / "variable"
"variable", TEST_DATA_DIR / "validation_nc" / "variable"
).to_excel(file)
)

Expand All @@ -148,11 +148,11 @@ def test_to_excel(tmpdir):
def test_to_csv():
"""Check writing to csv"""
obs = VariableCodeList.from_directory(
"Variable", TEST_DATA_DIR / "simple_codelist"
"variable", TEST_DATA_DIR / "simple_codelist"
).to_csv(lineterminator="\n")

exp = (
"Variable,Description,Unit,Skip-region-aggregation,Bool\n"
"variable,description,unit,skip-region-aggregation,bool\n"
"Some Variable,Some basic variable,,False,True\n"
)
assert obs == exp
Expand Down Expand Up @@ -207,8 +207,8 @@ def test_to_excel_read_excel_roundtrip(tmpdir):
"variable",
tmpdir / "output.xlsx",
"variable",
"Variable",
attrs=["Description", "Unit", "Region-aggregation"],
"variable",
attrs=["description", "unit", "region-aggregation"],
)

assert obs == exp
Expand Down
23 changes: 19 additions & 4 deletions tests/test_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,18 +78,33 @@ def test_to_excel(simple_definition, tmpdir):

simple_definition.to_excel(file)

obs = pd.read_excel(file)
obs = pd.read_excel(file, sheet_name="variable")
exp = pd.read_excel(TEST_DATA_DIR / "excel_io" / "validation_nc.xlsx")
pd.testing.assert_frame_equal(obs, exp)


def test_to_excel_with_external_repo(tmpdir):
"""Check writing a DataStructureDefinition with an external repo to file"""
file = tmpdir / "testing_export.xlsx"

dsd = DataStructureDefinition(TEST_DATA_DIR / "general-config" / "definitions")
dsd.to_excel(file)

with pd.ExcelFile(file) as obs:
assert obs.sheet_names == ["project", "region", "variable"]

obs_project = obs.parse("project")
exp = pd.DataFrame([["project", "general-config"]], columns=["attribute", "value"])
pd.testing.assert_frame_equal(exp, obs_project[0:1])


@pytest.mark.parametrize(
"input_file, attrs, exp_file",
[
("validation_nc.xlsx", ["Description", "Unit"], "validation_nc_flat.yaml"),
("validation_nc.xlsx", ["description", "unit"], "validation_nc_flat.yaml"),
(
"validation_nc_list_arg.xlsx",
["Description", "Unit", "Region-aggregation"],
["description", "unit", "region-aggregation"],
"validation_nc_list_arg.yaml",
),
],
Expand All @@ -102,7 +117,7 @@ def test_create_yaml_from_xlsx(input_file, attrs, exp_file, tmpdir):
source=TEST_DATA_DIR / "excel_io" / input_file,
target=file,
sheet_name="variable_definitions",
col="Variable",
col="variable",
attrs=attrs,
)

Expand Down

0 comments on commit 4fbfc61

Please sign in to comment.