Skip to content

Commit

Permalink
Add functionality to automatically add modelcard on upload
Browse files Browse the repository at this point in the history
  • Loading branch information
juhoinkinen committed Jun 17, 2024
1 parent 3b5f7a1 commit 468411b
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 4 deletions.
15 changes: 13 additions & 2 deletions annif/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,8 +616,15 @@ def run_hyperopt(project_id, paths, docs_limit, trials, jobs, metric, results_fi
"--commit-message",
help="""The summary / title / first line of the generated commit.""",
)
@click.option(
"--modelcard/--no-modelcard",
default=True,
help="Update or create a ModelCard with upload.",
)
@cli_util.common_options
def run_upload(project_ids_pattern, repo_id, token, revision, commit_message):
def run_upload(
project_ids_pattern, repo_id, token, revision, commit_message, modelcard
):
"""
Upload selected projects and their vocabularies to a Hugging Face Hub repository.
\f
Expand Down Expand Up @@ -655,6 +662,8 @@ def run_upload(project_ids_pattern, repo_id, token, revision, commit_message):
finally:
for fobj in fobjs:
fobj.close()
if modelcard:
hfh_util.upsert_modelcard(repo_id, projects, token, revision)


@cli.command("download")
Expand Down Expand Up @@ -690,7 +699,9 @@ def run_download(project_ids_pattern, repo_id, token, revision, force):
`project_ids_pattern` from the specified Hugging Face Hub repository and
unzips the archives to `data/` directory and places the configuration files
to `projects.d/` directory. An authentication token and revision can
be given with options.
be given with options. If the README.md does not exist in the repository it is
created with default contents and metadata of the uploaded projects, if it exisits,
its metadata are updated as necessary.
"""

project_ids = hfh_util.get_matching_project_ids_from_hf_hub(
Expand Down
46 changes: 46 additions & 0 deletions annif/hfh_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,49 @@ def get_vocab_id_from_config(config_path: str) -> str:
config.read(config_path)
section = config.sections()[0]
return config[section]["vocab"]


def upsert_modelcard(repo_id, projects, token, revision):
"""This function creates or updates a Model Card in a Hugging Face Hub repository
with some metadata in it."""
from huggingface_hub import ModelCard

card_exists = "README.md" in _list_files_in_hf_hub(repo_id, token, revision)
if card_exists:
card = ModelCard.load(repo_id)
commit_message = "Update README.md with Annif"
else:
card = _create_modelcard(repo_id)
commit_message = "Create README.md with Annif"

langs_existing = set(card.data.language) if card.data.language else set()
langs_to_add = {proj.vocab_lang for proj in projects}
card.data.language = list(langs_existing.union(langs_to_add))

card.push_to_hub(
repo_id=repo_id, token=token, revision=revision, commit_message=commit_message
)


def _create_modelcard(repo_id):
from huggingface_hub import ModelCard

content = f"""
---
---
# {repo_id.split("/")[1]}
## Usage
Use the `annif download` command to download selected projects with Annif;
for example, to download all projects in this repository run
annif download "*" {repo_id}
"""
card = ModelCard(content)
card.data.pipeline_tag = "text-classification"
card.data.tags = ["annif"]
return card
26 changes: 24 additions & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1069,10 +1069,13 @@ def test_run_help():
assert "Run Annif in server mode for development." in result.output


@mock.patch("annif.hfh_util.upsert_modelcard")
@mock.patch("huggingface_hub.HfApi.preupload_lfs_files")
@mock.patch("huggingface_hub.CommitOperationAdd")
@mock.patch("huggingface_hub.HfApi.create_commit")
def test_upload(create_commit, CommitOperationAdd, preupload_lfs_files):
def test_upload(
create_commit, CommitOperationAdd, preupload_lfs_files, upsert_modelcard
):
result = runner.invoke(annif.cli.cli, ["upload", "dummy-fi", "dummy-repo"])
assert not result.exception
assert create_commit.call_count == 1
Expand Down Expand Up @@ -1108,16 +1111,35 @@ def test_upload(create_commit, CommitOperationAdd, preupload_lfs_files):
)
in create_commit.call_args_list
)
assert upsert_modelcard.call_count == 1


@mock.patch("annif.hfh_util.upsert_modelcard")
@mock.patch("huggingface_hub.HfApi.preupload_lfs_files")
@mock.patch("huggingface_hub.CommitOperationAdd")
@mock.patch("huggingface_hub.HfApi.create_commit")
def test_upload_many(create_commit, CommitOperationAdd, preupload_lfs_files):
def test_upload_many(
create_commit, CommitOperationAdd, preupload_lfs_files, upsert_modelcard
):
result = runner.invoke(annif.cli.cli, ["upload", "dummy-*", "dummy-repo"])
assert not result.exception
assert create_commit.call_count == 1
assert CommitOperationAdd.call_count == 11
assert upsert_modelcard.call_count == 1


@mock.patch("huggingface_hub.HfApi.preupload_lfs_files")
@mock.patch("huggingface_hub.CommitOperationAdd")
@mock.patch("huggingface_hub.HfApi.create_commit")
@mock.patch("annif.hfh_util.upsert_modelcard")
def test_upload_no_modelcard_upsert(
upsert_modelcard, create_commit, CommitOperationAdd, preupload_lfs_files
):
result = runner.invoke(
annif.cli.cli, ["upload", "dummy-fi", "dummy-repo", "--no-modelcard"]
)
assert not result.exception
assert upsert_modelcard.call_count == 0


def test_upload_nonexistent_repo():
Expand Down
61 changes: 61 additions & 0 deletions tests/test_hfh_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,64 @@ def test_copy_project_config_overwrite(copy, exists):
assert copy.call_args == mock.call(
"tests/huggingface-cache/dummy-fi.cfg", "projects.d/dummy-fi.cfg"
)


@mock.patch("annif.hfh_util._list_files_in_hf_hub", return_value=["README.md"])
@mock.patch(
"huggingface_hub.ModelCard.load",
)
@mock.patch(
"huggingface_hub.ModelCard",
)
def test_upsert_modelcard_existing_card(
modelcard, load, _list_files_in_hf_hub, project
):
repo_id = "user/repo"
projects = [project]
token = "mytoken"
revision = "main"

annif.hfh_util.upsert_modelcard(repo_id, projects, token, revision)

assert not modelcard.called # Do not create new card
assert load.called_once_with(repo_id)
assert load.return_value.push_to_hub.called_once_with(
repo_id=repo_id,
token=token,
revision=revision,
commit_message="Update README.md with Annif",
)


@mock.patch("annif.hfh_util._list_files_in_hf_hub", return_value=[])
@mock.patch(
"huggingface_hub.ModelCard",
)
def test_upsert_modelcard_new_card(modelcard, _list_files_in_hf_hub, project):
repo_id = "annif-user/annif-hfh-repo"
projects = [project]
token = "mytoken"
revision = "main"

annif.hfh_util.upsert_modelcard(repo_id, projects, token, revision)

assert modelcard.called_once()
assert "# annif-hfh-repo" in modelcard.call_args[0][0] # README heading
assert modelcard.return_value.push_to_hub.called_once_with(
repo_id=repo_id,
token=token,
revision=revision,
commit_message="Create README.md with Annif",
)


@mock.patch(
"huggingface_hub.ModelCard",
)
def test_create_modelcard(modelcard):
repo_id = "user/repo"

result = annif.hfh_util._create_modelcard(repo_id)

assert result.data.pipeline_tag == "text-classification"
assert result.data.tags == ["annif"]

0 comments on commit 468411b

Please sign in to comment.