/
aggregation.py
99 lines (75 loc) · 3.66 KB
/
aggregation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
"""Module for aggregating results
This module has methods to aggregate answers and to estimate confidence in aggregated labels.
Use it when each task is assigned to several Tolokers. Note, that aggregation runs on the Toloka server.
If you need advanced aggregation methods or want to run aggregation algorithms locally on your computer,
try [crowd-kit library](https://toloka.ai/en/docs/crowd-kit).
"""
__all__ = [
'AggregatedSolutionType',
'PoolAggregatedSolutionRequest',
'TaskAggregatedSolutionRequest',
'WeightedDynamicOverlapTaskAggregatedSolutionRequest',
'AggregatedSolution'
]
from enum import unique
from typing import Any, Dict, List
from .primitives.base import BaseTolokaObject
from ..util._codegen import attribute
from ..util._docstrings import inherit_docstrings
from ..util._extendable_enum import ExtendableStrEnum
@unique
class AggregatedSolutionType(ExtendableStrEnum):
WEIGHTED_DYNAMIC_OVERLAP = 'WEIGHTED_DYNAMIC_OVERLAP'
DAWID_SKENE = 'DAWID_SKENE'
class PoolAggregatedSolutionRequest(BaseTolokaObject):
"""Parameters for aggregating results in a pool using the [aggregate_solutions_by_pool](toloka.client.TolokaClient.aggregate_solutions_by_pool.md) method.
Attributes:
type: Aggregation model:
* `WEIGHTED_DYNAMIC_OVERLAP` — [Aggregation](https://toloka.ai/en/docs/guide/concepts/result-aggregation#aggr-by-skill) based on Tolokers' skill in a pool with a dynamic overlap.
* `DAWID_SKENE` — [Dawid-Skene aggregation model](https://toloka.ai/en/docs/guide/concepts/result-aggregation#dawid-skene). It is used in pools without a dynamic overlap.
pool_id: The ID of the pool.
answer_weight_skill_id: The ID of the skill that determines the weight of the Toloker's responses.
fields: Output data fields to aggregate. For the best results, each of these fields should have limited number of response options.
If the `DAWID_SKENE` aggregation type is selected, you can only specify one value.
"""
class Field(BaseTolokaObject):
name: str
type: AggregatedSolutionType = attribute(autocast=True)
pool_id: str
answer_weight_skill_id: str
fields: List[Field]
class TaskAggregatedSolutionRequest(BaseTolokaObject, spec_field='type', spec_enum=AggregatedSolutionType):
"""Base class with parameters to run aggregation for a single task.
Attributes:
task_id: The ID of the task.
pool_id: The ID of the pool containing the task.
"""
task_id: str
pool_id: str
@inherit_docstrings
class WeightedDynamicOverlapTaskAggregatedSolutionRequest(
TaskAggregatedSolutionRequest,
spec_value=AggregatedSolutionType.WEIGHTED_DYNAMIC_OVERLAP
):
"""Parameters to run weighted aggregation for a single task with a dynamic overlap.
Attributes:
answer_weight_skill_id: The ID of the skill that determines the weight of the Toloker's responses.
fields: Output data fields to aggregate. For the best results, each of these fields should have limited number of response options.
If the `DAWID_SKENE` aggregation type is selected, you can only specify one value.
"""
class Field(BaseTolokaObject):
name: str
answer_weight_skill_id: str
fields: List[Field]
class AggregatedSolution(BaseTolokaObject):
"""An aggregated response to a task.
Attributes:
pool_id: The ID of the pool containing the task.
task_id: The ID of the task.
confidence: The confidence level for the aggregated response.
output_values: Output data fields with aggregated responses.
"""
pool_id: str
task_id: str
confidence: float
output_values: Dict[str, Any]