use aggregators as a uniform incarnation for the across-instances computation for both instance and global metrics #890

dafnapension · 2024-06-06T16:44:59Z

Thus simplified and thereby extended grouping and filtering over to global metrics and to bulk-instance metrics

elronbandel · 2024-06-24T11:22:19Z

src/unitxt/metrics.py

+class Aggregator(Artifact):
+    @abstractmethod
+    def aggregate_one_group_score_named(
+        self, instances: List[Dict[str, Any]], score_names: List[str]
+    ) -> dict:
+        pass


Suggested change

class Aggregator(Artifact):

@abstractmethod

def aggregate_one_group_score_named(

self, instances: List[Dict[str, Any]], score_names: List[str]

) -> dict:

pass

class Aggregator(Artifact):

"""

Aggregate list of instances to a dictionary of scores.

""""

score_names: List[str]

@abstractmethod

def aggregate(

self, instances: List[Dict[str, Any]]

) -> Dict[str, Any]:

pass

def __call__(self, instances):

return self.aggregate(instances)

elronbandel · 2024-06-24T11:25:40Z

src/unitxt/metrics.py

+class SimpleAggregator(Aggregator):
+    aggregating_func: Callable[[List[Dict[str, Any]], str], float]
+
+    def aggregate_one_group_score_named(
+        self, instances: List[Dict[str, Any]], score_names: List[str]
+    ) -> dict:
+        result = {}
+        for score_name in score_names:
+            result[score_name] = self.aggregating_func(instances, score_name)
+        return result
+
+
+def average_item_scores(instances: List[dict], score_name: str) -> float:
+    """Calculate mean of a set of instance scores (given by score_name), omitting NaN values.
+
+    Args:
+        instances: list of dicts of each instance's instance scores.
+        score_name: score field names to compute the mean for.
+    """
+    return nan_mean(
+        [instance["score"]["instance"][score_name] for instance in instances]
+    )
+
+
+def max_item_scores(instances: List[dict], score_name: str) -> float:
+    """Calculate max of a set of instance scores (given by score_name), omitting NaN values.
+
+    Args:
+        instances: list of dicts of each instance's instance scores.
+        score_name: score field names to compute the mean for.
+    """
+    return nan_max(
+        [instance["score"]["instance"][score_name] for instance in instances]
+    )
+
+
+class AverageItemsAggregator(SimpleAggregator):
+    aggregating_func = Field(default_factory=lambda: average_item_scores)


Suggested change

class SimpleAggregator(Aggregator):

aggregating_func: Callable[[List[Dict[str, Any]], str], float]

def aggregate_one_group_score_named(

self, instances: List[Dict[str, Any]], score_names: List[str]

) -> dict:

result = {}

for score_name in score_names:

result[score_name] = self.aggregating_func(instances, score_name)

return result

def average_item_scores(instances: List[dict], score_name: str) -> float:

"""Calculate mean of a set of instance scores (given by score_name), omitting NaN values.

Args:

instances: list of dicts of each instance's instance scores.

score_name: score field names to compute the mean for.

"""

return nan_mean(

[instance["score"]["instance"][score_name] for instance in instances]

)

def max_item_scores(instances: List[dict], score_name: str) -> float:

"""Calculate max of a set of instance scores (given by score_name), omitting NaN values.

Args:

instances: list of dicts of each instance's instance scores.

score_name: score field names to compute the mean for.

"""

return nan_max(

[instance["score"]["instance"][score_name] for instance in instances]

)

class AverageItemsAggregator(SimpleAggregator):

aggregating_func = Field(default_factory=lambda: average_item_scores)

class Mean(Aggregator):

def aggregate(

self, instances: List[Dict[str, Any]]

) -> dict:

result = {}

for score_name in self.score_names:

result[score_name] = nan_mean(

[instance["score"]["instance"][score_name] for instance in instances]

)

class Max(Aggregator):

def aggregate(

self, instances: List[Dict[str, Any]]

) -> dict:

result = {}

for score_name in self.score_names:

result[score_name] = nan_max(

[instance["score"]["instance"][score_name] for instance in instances]

)

class ConfidenceInterval(Aggregator):

aggregator: Aggregator = Mean()

def aggregate(

self, instances: List[Dict[str, Any]]

) -> dict:

results = []

for sample in sample(instances):

result = self.aggregate(sample)

results.append(result)

result["ci_low"], results["ci_high"] = ci(results)

class Filter(Aggregator):

aggregator: Aggregator = Mean()

filter: Func

def aggregate(

self, instances: List[Dict[str, Any]]

) -> dict:

instances = [instance for instance in instances if self.filter(instance)]

return self.aggregate(instances)

class Group(Aggregator):

group_aggregator: Aggregator = Mean()

all_groups_aggregator: Aggregator = Mean()

results = {}

group_results =[]

for group, group_name in split_to_groups(instances):

result = self.group_aggregator(group)

group_results.append(result)

update_result(result, group_name)

results.update(self. all_groups_aggregator(group_results))

return results

class Mapper: def map(instance: Dict[str, Any]): -> Dict[str, Any] pass def __call__(instances): return [self.map(instance) for instance in instances] class Metric: aggregate: Aggregator = Group(ConfigenceInterval(Mean(fields=["f1"]), group_by="group_id") # aggregator: Aggregator = Group(ConfigenceInterval(RougeAggregator()), group_by="group_id") # map: Mapper = RougeInstanceScore() def compute(instances): instances = self.map(instances) return self.aggregate(instances)

So every metric has one and only aggregator

elronbandel · 2024-06-24T11:27:38Z

src/unitxt/metrics.py

+    ci_samples_from_groups_scores: bool = False
+
+    # the basic aggregation along the instances: no split to groups, no filtering
+    aggregator: Aggregator = Field(default_factory=lambda: AverageItemsAggregator())


Suggested change

aggregator: Aggregator = Field(default_factory=lambda: AverageItemsAggregator())

aggregator: Aggregator = Mean()

elronbandel · 2024-06-24T11:29:57Z

src/unitxt/metrics.py

@@ -317,7 +380,7 @@ def score_based_confidence_interval(
            # if aggregation_func is None, we simply take the mean of the resampled instance scores
            # otherwise, the aggregation_func needs to be applied AFTER resampling the instances;
            #   that is, re-form the groups, calculate the function, and take the mean of the group scores
-            aggregation_func = self.average_item_scores
+            aggregation_func = AverageItemsAggregator().aggregate_one_group_score_named


Suggested change

aggregation_func = AverageItemsAggregator().aggregate_one_group_score_named

aggregation_func = self.aggregator

elronbandel

See comments

…yping of prediction (type rather than string) Signed-off-by: dafnapension <dafnashein@yahoo.com>

Signed-off-by: dafnapension <dafnashein@yahoo.com>

dafnapension · 2024-08-08T20:25:54Z

Maybe return to this after the war ends

dafnapension · 2024-08-09T09:53:17Z

too complicated to maintain rebase-able. So closing for now.

dafnapension force-pushed the metrics_with_aggregators_2 branch from 355a298 to 73d8ce8 Compare June 6, 2024 16:45

dafnapension mentioned this pull request Jun 6, 2024

Simplified and thereby extended grouping, filtering, and splitting to control-comparison, over to global metrics and to bulk-instance metrics #845

Closed

dafnapension force-pushed the metrics_with_aggregators_2 branch 4 times, most recently from 7c48176 to ee1dde6 Compare June 11, 2024 14:58

dafnapension force-pushed the metrics_with_aggregators_2 branch 9 times, most recently from 9541e46 to 41d3f97 Compare June 21, 2024 10:50

dafnapension force-pushed the metrics_with_aggregators_2 branch 2 times, most recently from e36feee to dd2a491 Compare June 24, 2024 07:56

elronbandel reviewed Jun 24, 2024

View reviewed changes

elronbandel requested changes Jun 24, 2024

View reviewed changes

dafnapension force-pushed the metrics_with_aggregators_2 branch 8 times, most recently from 56c3af3 to 76e76c1 Compare July 1, 2024 07:51

dafnapension force-pushed the metrics_with_aggregators_2 branch 16 times, most recently from af4cb71 to 4f279f1 Compare July 29, 2024 15:33

dafnapension force-pushed the metrics_with_aggregators_2 branch 6 times, most recently from edc5749 to 89b583f Compare August 8, 2024 07:45

dafnapension added 2 commits August 8, 2024 15:32

make all one commit to ease rebasing. Last thing: update by the new t…

8a3c8b2

…yping of prediction (type rather than string) Signed-off-by: dafnapension <dafnashein@yahoo.com>

secrets

6bf93e6

Signed-off-by: dafnapension <dafnashein@yahoo.com>

dafnapension force-pushed the metrics_with_aggregators_2 branch from 89b583f to 6bf93e6 Compare August 8, 2024 12:32

dafnapension closed this Aug 8, 2024

dafnapension reopened this Aug 9, 2024

dafnapension closed this Aug 9, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

use aggregators as a uniform incarnation for the across-instances computation for both instance and global metrics #890

use aggregators as a uniform incarnation for the across-instances computation for both instance and global metrics #890

dafnapension commented Jun 6, 2024

elronbandel Jun 24, 2024 •

edited

Loading

elronbandel Jun 24, 2024 •

edited

Loading

elronbandel Jun 24, 2024 •

edited

Loading

elronbandel Jun 24, 2024

elronbandel Jun 24, 2024

elronbandel left a comment

dafnapension commented Aug 8, 2024

dafnapension commented Aug 9, 2024

	aggregator: Aggregator = Field(default_factory=lambda: AverageItemsAggregator())
	aggregator: Aggregator = Mean()

	aggregation_func = AverageItemsAggregator().aggregate_one_group_score_named
	aggregation_func = self.aggregator

use aggregators as a uniform incarnation for the across-instances computation for both instance and global metrics #890

use aggregators as a uniform incarnation for the across-instances computation for both instance and global metrics #890

Conversation

dafnapension commented Jun 6, 2024

elronbandel Jun 24, 2024 • edited Loading

Choose a reason for hiding this comment

elronbandel Jun 24, 2024 • edited Loading

Choose a reason for hiding this comment

elronbandel Jun 24, 2024 • edited Loading

Choose a reason for hiding this comment

elronbandel Jun 24, 2024

Choose a reason for hiding this comment

elronbandel Jun 24, 2024

Choose a reason for hiding this comment

elronbandel left a comment

Choose a reason for hiding this comment

dafnapension commented Aug 8, 2024

dafnapension commented Aug 9, 2024

elronbandel Jun 24, 2024 •

edited

Loading

elronbandel Jun 24, 2024 •

edited

Loading

elronbandel Jun 24, 2024 •

edited

Loading