Skip to content

Commit

Permalink
add intl to korean and arabic (#698)
Browse files Browse the repository at this point in the history
* add intl

* fix

* add tests to ar and ko

* ko tokenizer

* ko-mecab addition

* add more examples + error

* Moved sacrebleu dependencies to tests.rqr

* Added general mechanism for detailed installation messages
for required packages

Signed-off-by: Yoav Katz <katz@il.ibm.com>

* Simplified error mechanism for missing requirements

Signed-off-by: Yoav Katz <katz@il.ibm.com>

* Add documentation for package requirements mixin

Signed-off-by: Yoav Katz <katz@il.ibm.com>

* Added unitest for sacrebleu

Signed-off-by: Yoav Katz <katz@il.ibm.com>

* Fixed typo in doc.

Signed-off-by: Yoav Katz <katz@il.ibm.com>

---------

Signed-off-by: Yoav Katz <katz@il.ibm.com>
Co-authored-by: Przemysław Klocek <przemyslaw.klocek@ibm.com>
Co-authored-by: Yoav Katz <katz@il.ibm.com>
  • Loading branch information
3 people committed Mar 27, 2024
1 parent c1fd5e4 commit 44a410c
Show file tree
Hide file tree
Showing 7 changed files with 238 additions and 52 deletions.
184 changes: 157 additions & 27 deletions prepare/metrics/normalized_sacrebleu.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from src.unitxt import add_to_catalog
from src.unitxt.metrics import HuggingfaceMetric, MetricPipeline
from src.unitxt.metrics import MetricPipeline, NormalizedSacrebleu
from src.unitxt.operators import CopyFields, MapInstanceValues
from src.unitxt.test_utils.metrics import test_metric

Expand All @@ -12,15 +12,15 @@
"French": None,
"Spanish": None,
"Portuguese": None,
"Arabic": None,
"Korean": None,
"Arabic": "intl",
"Korean": "ko-mecab",
"fr": None,
"de": None,
"es": None,
"pt": None,
"en": None,
"ar": None,
"ko": None,
"ar": "intl",
"ko": "ko-mecab",
"japanese": "ja-mecab",
"Japanese": "ja-mecab",
"ja": "ja-mecab",
Expand All @@ -41,17 +41,11 @@
use_query=True,
),
],
metric=HuggingfaceMetric(
hf_metric_name="sacrebleu",
hf_main_score="score",
prediction_type="str",
main_score="sacrebleu",
scale=100.0,
scaled_fields=["sacrebleu", "precisions"],
hf_additional_input_fields_pass_one_value=["tokenize"],
),
metric=NormalizedSacrebleu(),
)

### ENGLISH

predictions = ["hello there general kenobi", "on our way to ankh morpork"]
references = [
["hello there general kenobi", "hello there !"],
Expand Down Expand Up @@ -106,39 +100,117 @@
global_target=global_target,
)


### JAPANESE

predictions = [
"他の専門家たちと同様に、彼は糖尿病を完治できるかどうかについては懐疑的であり、これらの調査結果はすでにI型糖尿病を患っている人々には何の関連性もないことを指摘しています。"
"他の専門家たちと同様に、彼は糖尿病を完治できるかどうかについては懐疑的であり、これらの調査結果はすでにI型糖尿病を患っている人々には何の関連性もないことを指摘しています。",
"他方、成績評価の甘い授業がく評価されたり、人気取に走教師が出たりし、成績のりや大学教師のレベルダウという弊害をもたら恐れがある、などの反省見もある.",
]
references = [
[
"他の専門家たちと同様に、彼は糖尿病を完治できるかどうかについては懐疑的であり、これらの調査結果はすでにI型糖尿病を患っている人々には何の関連性もないことを指摘しています。"
]
],
[
"他方、成績評価の甘い授業が高く評価されたり、人気取りに走る教師が出たりし、成績の安売りや大学教師のレベルダウンという弊害をもたらす恐れがある、などの反省意見もある."
],
]
task_data = [{"target_language": "ja"}]
task_data = len(predictions) * [{"target_language": "ja", "tokenize": "ja-mecab"}]

instance_targets = [
{
"bp": 1.0,
"counts": [57, 56, 55, 54],
"totals": [57, 56, 55, 54],
"precisions": [1.0, 1.0, 1.0, 1.0],
"bp": 1.0,
"sys_len": 57,
"ref_len": 57,
"sacrebleu": 1.0,
"score": 1.0,
"score_name": "sacrebleu",
"sys_len": 57,
"totals": [57, 56, 55, 54],
},
{
"counts": [39, 31, 24, 17],
"totals": [47, 46, 45, 44],
"precisions": [0.83, 0.67, 0.53, 0.39],
"bp": 0.98,
"sys_len": 47,
"ref_len": 48,
"sacrebleu": 0.57,
"score": 0.57,
"score_name": "sacrebleu",
},
]


global_target = {
"counts": [96, 87, 79, 71],
"totals": [104, 102, 100, 98],
"precisions": [0.92, 0.85, 0.79, 0.72],
"bp": 0.99,
"sys_len": 104,
"ref_len": 105,
"sacrebleu": 0.81,
"score": 0.81,
"score_name": "sacrebleu",
"score_ci_low": 0.57,
"score_ci_high": 1.0,
"sacrebleu_ci_low": 0.57,
"sacrebleu_ci_high": 1.0,
}
outputs = test_metric(
metric=metric,
predictions=predictions,
references=references,
instance_targets=instance_targets,
global_target=global_target,
task_data=task_data,
)

### ARABIC

predictions = ["لى يسارك ، بر ماركت.", "ﻣَﺮَّﺕ ﻋِﺪَّﺓُ ﺳَﻨَﻮَﺍﺕٍ ﻗَﺒﻞ ﺃَﻥ ﺃَﺭَﺍﻫَﺎ ﻣِﻦ ﺟَﺪِﻳﺪٍ"]
references = [["على ، ستمر سوبر ماركت."], ["ﻣَﺮَّﺕ ﻋِﺪَّﺓُ ﺳَﻨَﻮَﺍﺕٍ ﻗَﺒﻞ ﺃَﻥ ﺃَﺭَﺍﻫَﺎ ﻣِﻦ ﺟَﺪِﻳﺪٍ"]]
task_data = len(predictions) * [{"target_language": "ar", "tokenize": "intl"}]
instance_targets = [
{
"counts": [3, 1, 0, 0],
"totals": [6, 5, 4, 3],
"precisions": [0.5, 0.2, 0.12, 0.08],
"bp": 1.0,
"sys_len": 6,
"ref_len": 6,
"sacrebleu": 0.18,
"score": 0.18,
"score_name": "sacrebleu",
},
{
"counts": [8, 7, 6, 5],
"totals": [8, 7, 6, 5],
"precisions": [1.0, 1.0, 1.0, 1.0],
"bp": 1.0,
"sys_len": 8,
"ref_len": 8,
"sacrebleu": 1.0,
"score": 1.0,
"score_name": "sacrebleu",
},
]

global_target = {
"counts": [11, 8, 6, 5],
"totals": [14, 12, 10, 8],
"precisions": [0.79, 0.67, 0.6, 0.62],
"bp": 1.0,
"counts": [57, 56, 55, 54],
"precisions": [1.0, 1.0, 1.0, 1.0],
"ref_len": 57,
"sacrebleu": 1.0,
"score": 1.0,
"sys_len": 14,
"ref_len": 14,
"sacrebleu": 0.67,
"score": 0.67,
"score_name": "sacrebleu",
"sys_len": 57,
"totals": [57, 56, 55, 54],
"score_ci_low": 0.13,
"score_ci_high": 1.0,
"sacrebleu_ci_low": 0.13,
"sacrebleu_ci_high": 1.0,
}

outputs = test_metric(
Expand All @@ -150,5 +222,63 @@
task_data=task_data,
)

### KOREAN

predictions = ["이게에 신을 살 거예요", "저는 한국 친구를 사귀고 싶습니다"]
references = [
["이 가게에서 신발을 살 거예요", "이 가에서 신발살 거예요"],
["저는 한국 친구를 사귀고 싶습니다", "저는 한구를 사귀 싶습니다"],
]
task_data = len(predictions) * [{"target_language": "ko", "tokenize": "ko-mecab"}]

instance_targets = [
{
"counts": [4, 3, 2, 1],
"totals": [7, 6, 5, 4],
"precisions": [0.57, 0.5, 0.4, 0.25],
"bp": 1.0,
"sys_len": 7,
"ref_len": 7,
"sacrebleu": 0.41,
"score": 0.41,
"score_name": "sacrebleu",
},
{
"counts": [9, 8, 7, 6],
"totals": [9, 8, 7, 6],
"precisions": [1.0, 1.0, 1.0, 1.0],
"bp": 1.0,
"sys_len": 9,
"ref_len": 9,
"sacrebleu": 1.0,
"score": 1.0,
"score_name": "sacrebleu",
},
]

global_target = {
"counts": [13, 11, 9, 7],
"totals": [16, 14, 12, 10],
"precisions": [0.81, 0.79, 0.75, 0.7],
"bp": 1.0,
"sys_len": 16,
"ref_len": 16,
"sacrebleu": 0.76,
"score": 0.76,
"score_name": "sacrebleu",
"score_ci_low": 0.41,
"score_ci_high": 1.0,
"sacrebleu_ci_low": 0.41,
"sacrebleu_ci_high": 1.0,
}

outputs = test_metric(
metric=metric,
predictions=predictions,
references=references,
instance_targets=instance_targets,
global_target=global_target,
task_data=task_data,
)

add_to_catalog(metric, "metrics.normalized_sacrebleu", overwrite=True)
1 change: 0 additions & 1 deletion requirements/base.rqr
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
datasets>=2.16.0
evaluate
mecab-python3
absl-py
ipadic
scipy
3 changes: 2 additions & 1 deletion requirements/tests.rqr
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ httpretty~=1.1.4
editdistance
rouge-score
nltk
sacrebleu
mecab-python3
sacrebleu[ko]
scikit-learn
jiwer
conllu
Expand Down
22 changes: 5 additions & 17 deletions src/unitxt/catalog/metrics/normalized_sacrebleu.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,15 @@
"French": null,
"Spanish": null,
"Portuguese": null,
"Arabic": null,
"Korean": null,
"Arabic": "intl",
"Korean": "ko-mecab",
"fr": null,
"de": null,
"es": null,
"pt": null,
"en": null,
"ar": null,
"ko": null,
"ar": "intl",
"ko": "ko-mecab",
"japanese": "ja-mecab",
"Japanese": "ja-mecab",
"ja": "ja-mecab"
Expand All @@ -45,18 +45,6 @@
}
],
"metric": {
"type": "huggingface_metric",
"hf_metric_name": "sacrebleu",
"hf_main_score": "score",
"prediction_type": "str",
"main_score": "sacrebleu",
"scale": 100.0,
"scaled_fields": [
"sacrebleu",
"precisions"
],
"hf_additional_input_fields_pass_one_value": [
"tokenize"
]
"type": "normalized_sacrebleu"
}
}
27 changes: 27 additions & 0 deletions src/unitxt/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3302,3 +3302,30 @@ def compute(
best_thr = thr

return {self.main_score: best_acc, "best_thr_max_acc": best_thr}


KO_ERROR_MESSAGE = """
Additional dependencies required. To install them, run:
`pip install "sacrebleu[ko]"`.
For MacOS: If error on 'mecab-config' show up during installation ], one should run:
`brew install mecab`
`pip install "sacrebleu[ko]"`
"""


class NormalizedSacrebleu(HuggingfaceMetric):
hf_metric_name = "sacrebleu"
hf_main_score = "score"
prediction_type = "str"
main_score = "sacrebleu"
scale = 100.0
scaled_fields = ["sacrebleu", "precisions"]
hf_additional_input_fields_pass_one_value = ["tokenize"]
_requirements_list = {
"mecab_ko": KO_ERROR_MESSAGE,
"mecab_ko_dic": KO_ERROR_MESSAGE,
}
34 changes: 28 additions & 6 deletions src/unitxt/operator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re
from abc import abstractmethod
from dataclasses import field
from typing import Any, Dict, Generator, List, Optional
from typing import Any, Dict, Generator, List, Optional, Union

from .artifact import Artifact
from .dataclass import InternalField, NonPositionalField
Expand All @@ -14,7 +14,18 @@ class Operator(Artifact):


class PackageRequirementsMixin(Artifact):
_requirements_list: List[str] = InternalField(default_factory=list)
"""Base class used to automatically check for the existence of required python dependencies for an artifact (e.g. Operator or Metric).
The _requirement list is either a list of required packages
(e.g. ["torch","sentence_transformers"]) or a dictionary between required packages
and detailed installation instructions on how how to install each package.
(e.g. {"torch" : "Install Torch using `pip install torch`", "sentence_transformers" : Install Sentence Transformers using `pip install sentence-transformers`})
Note that the package names should be specified as they are used in the python import statement for the package.
"""

_requirements_list: Union[List[str], Dict[str, str]] = InternalField(
default_factory=list
)

def verify(self):
super().verify()
Expand All @@ -23,19 +34,30 @@ def verify(self):
def check_missing_requirements(self, requirements=None):
if requirements is None:
requirements = self._requirements_list
if isinstance(requirements, List):
requirements = {package: "" for package in requirements}

missing_packages = []
for package in requirements:
installation_instructions = []
for package, installation_instruction in requirements.items():
if not is_module_available(package):
missing_packages.append(package)
installation_instructions.append(installation_instruction)
if missing_packages:
raise MissingRequirementsError(self.__class__.__name__, missing_packages)
raise MissingRequirementsError(
self.__class__.__name__, missing_packages, installation_instructions
)


class MissingRequirementsError(Exception):
def __init__(self, class_name, missing_packages):
def __init__(self, class_name, missing_packages, installation_instructions):
self.class_name = class_name
self.missing_packages = missing_packages
self.message = f"{self.class_name} requires the following missing package(s): {', '.join(self.missing_packages)}"
self.installation_instruction = installation_instructions
self.message = (
f"{self.class_name} requires the following missing package(s): {', '.join(self.missing_packages)}. "
+ "\n".join(self.installation_instruction)
)
super().__init__(self.message)


Expand Down
Loading

0 comments on commit 44a410c

Please sign in to comment.