### Подготовим исходный датасет - из "сырого" текста Википедии уберем все лишнее (ссылки, знаки препинания) и разобьем на кусочки по 3 слова

In [None]:
inp = ''
text = ''
while inp != 'stop':
    inp = input()
    text += inp

In [None]:
words = text.split(' ')
words

In [None]:
data = []
cur = ''

In [None]:
p = 0
for word in words:
    if (p % 3 == 0):
        data.append(cur)
        cur = ''
    cur += word + ' '
    p += 1
data

In [None]:
with open('data.tsv', 'w') as filehandle:  
    for listitem in data:
        filehandle.write('%s\n' % listitem)

Файл "data.tsv" готов к загрузке в проект

In [None]:
# !pip install ipyplot
# !pip install toloka-kit

import os
import datetime
import time

import ipyplot
import pandas

import toloka.client as toloka
import toloka.client.project.template_builder as tb

In [None]:
token = input("Enter your token:") 
if token == '':
    print('The token you entered may be invalid. Please try again.')
else:
    print('OK')

In [None]:
# Create a Toloka client instance
# All API calls will pass through it
toloka_client = toloka.TolokaClient(token, 'PRODUCTION')  # or switch to SANDBOX

# We check the money available in your account, which also checks the validity of the OAuth token
requester = toloka_client.get_requester()
if requester.balance > 3.0:
    print('У вас достаточно денег на счету - ', requester.balance)
else:
    print('У вас маловато денег на счету - ', requester.balance)

### Шаг (проект) 1. Получим от толокеров написанные на бумаге фразы

In [None]:
# Датасет для первого проекта - список фраз или слов, которые толокеры напишут от руки
dataset = pandas.read_csv('data.tsv', sep='\t')
dataset.head()

In [None]:
language = input()

In [None]:
# How performers will see the task
project_interface = toloka.project.view_spec.TemplateBuilderViewSpec(
    config=tb.TemplateBuilder(
        view=tb.fields.MediaFileFieldV1(  # component for selecting areas in images
            label=tb.data.InputData(path='text'),  # getter for the input image
            data=tb.data.OutputData(path='result'),  # path for writing output data
            validation=tb.conditions.RequiredConditionV1(),  # at least one area should be selected
            accept={'photo' : True, 'gallery' : True, 'fileSystem' : True}
        )
    )
)

# You can write instructions and upload them from a file or enter them later in the web interface
# prepared_instruction = open('instruction.html').read().strip()
prepared_instruction = '<b>Write the given phrases on paper and download photos of them</b>'

# Set up the project
handwriting_project = toloka.project.Project(
    assignments_issuing_type=toloka.project.Project.AssignmentsIssuingType.AUTOMATED,
    public_name=f'Write phrases and download photos ({language})',
    public_description='Write the given phrases on paper and download photos of them',
    public_instructions=prepared_instruction,
    # Set up the task: view, input, and output parameters
    task_spec=toloka.project.task_spec.TaskSpec(
        input_spec={'text': toloka.project.field_spec.StringSpec()},
        output_spec={'result': toloka.project.field_spec.FileSpec()},
        view_spec=project_interface,
    ),
)

# Call the API to create a new project
handwriting_project = toloka_client.create_project(handwriting_project)
print(f'Created handwriting project with id {handwriting_project.id}')
print(f'To view the project, go to: https://toloka.yandex.ru/requester/project/{handwriting_project.id}')

In [None]:
handwriting_skill = next(toloka_client.get_skills(name=f'Handwriting in {language} language'), None)
if handwriting_skill:
    print('handwriting skill already exists')
else:
    print('Create new handwriting skill')
    handwriting_skill = toloka_client.create_skill(
        name=f'Handwriting in {language} language',
        hidden=True,
        public_requester_description={'EN': f'Handwriting in {language} language'},
    )

verification_skill = next(toloka_client.get_skills(name=f'Handwriting in {language} language verification'), None)
if verification_skill:
    print('Verification skill already exists')
else:
    print('Create new verification skill')
    verification_skill = toloka_client.create_skill(
        name=f'Handwriting in {language} language verification',
        hidden=True,
        public_requester_description={'EN': f'Handwriting in {language} language verification skill'},
    )

In [None]:
handwriting_pool = toloka.pool.Pool(
    project_id=handwriting_project.id,
    private_name='Pool 1',  # Only you can see this information.
    may_contain_adult_content=False,
    will_expire=datetime.datetime.utcnow() + datetime.timedelta(days=365),  # Pool will close after one year
    reward_per_assignment=0.03,     # We set the minimum payment amount for one task page
    auto_accept_solutions=False,    # We will only pay the performer for completing the task,
                                    #    based on the verification results of the second project
    auto_accept_period_day=1,       # Number of days to determine if we'll pay
    assignment_max_duration_seconds=60*20,  # Give performers 20 minutes to complete one task
    defaults=toloka.pool.Pool.Defaults(
        # We don't need overlapping for handwriting tasks
        default_overlap_for_new_task_suites=1,
        default_overlap_for_new_tasks=1,
    ),
)

# Set the number of tasks per page
handwriting_pool.set_mixer_config(real_tasks_count=5, golden_tasks_count=0, training_tasks_count=0)
# Please note that the payment amount specified when creating the pool is the amount the performer receives for completing one page of tasks.
# If you specify 10 tasks per page above, then reward_per_assignment will be paid for completing 10 tasks.

# We'll only show our tasks to English-speaking users because the description of the task is in English.
# This means that only people who speak English will be able to accept this task.
handwriting_pool.filter = toloka.filter.Languages.in_(language)

print(handwriting_pool.private_name)

In [None]:
# The first rule in this project restricts pool access for performers who often make mistakes
handwriting_pool.quality_control.add_action(
    collector=toloka.collectors.AcceptanceRate(),
    conditions=[
        # Performer completed more than 2 tasks
        toloka.conditions.TotalAssignmentsCount > 2,
        # and more than 35% of their responses were rejected
        toloka.conditions.RejectedAssignmentsRate > 35,
    ],
    # This action tells Toloka what to do if the condition above is True
    # In our case, we'll restrict access for 15 days
    # Always leave a comment: it may be useful later on
    action=toloka.actions.RestrictionV2(
        scope=toloka.user_restriction.UserRestriction.ALL_PROJECTS,
        duration=15,
        duration_unit='DAYS',
        private_comment='Performer often makes mistakes',  # Only you will see this comment
    )
)

In [None]:
# The second useful rules is "Fast responses". It allows us to filter out performers who respond too quickly.
handwriting_pool.quality_control.add_action(
    # Let's monitor fast submissions for the last 5 completed task pages
    # and define a quick response as one that takes less than 20 seconds
    collector=toloka.collectors.AssignmentSubmitTime(history_size=5, fast_submit_threshold_seconds=20),
    # If we see more than one fast response,
    conditions=[toloka.conditions.FastSubmittedCount > 1],
    # we ban the performer from all our projects for 10 days
    action=toloka.actions.RestrictionV2(
        scope=toloka.user_restriction.UserRestriction.ALL_PROJECTS,
        duration=10,
        duration_unit='DAYS',
        private_comment='Fast responses',  # Only you will see this comment
    )
)

In [None]:
# Another rule we use is for automatically updating skills
# This isn't really about quality, but rules can do a lot of useful things
# We update the handwriting skill for performers who complete at least one task
handwriting_pool.quality_control.add_action(
    collector=toloka.collectors.AnswerCount(),
    # If the performer completed at least one task,
    conditions=[toloka.conditions.AssignmentsAcceptedCount > 0],
    # it doesn't add to the skill, it sets the new skill to 1
    action=toloka.actions.SetSkill(skill_id=handwriting_skill.id, skill_value=1),
)

# Recompletion of rejected assignments sends the tasks you rejected to other performers according to a specified rules.
handwriting_pool.quality_control.add_action(
    collector=toloka.collectors.AssignmentsAssessment(),
    # Check if a task was rejected
    conditions=[toloka.conditions.AssessmentEvent == toloka.conditions.AssessmentEvent.REJECT],
    # If the condition is True, add 1 to overlap and open the pool
    action=toloka.actions.ChangeOverlap(delta=1, open_pool=True),
)

print('Quality rules count:', len(handwriting_pool.quality_control.configs))

In [None]:
handwriting_pool = toloka_client.create_pool(handwriting_pool)
print(f'To view this pool, visit: https://toloka.yandex.ru/requester/project/{handwriting_project.id}/pool/{handwriting_pool.id}')

### Шаг (проект) 2. Проверка проекта 1 другими толокерами

In [None]:
verification_interface = toloka.project.view_spec.TemplateBuilderViewSpec(
    config=tb.TemplateBuilder(
        view=tb.view.ListViewV1(  # list of components that should be positioned from top to bottom in the ui
            items=[
                tb.fields.ImageAnnotationFieldV1(  # image and selected areas to verify
                    image=tb.data.InputData(path='result'),
                    disabled=True  # disable adding and deleting areas
                ),
                tb.fields.RadioGroupFieldV1(  # a component for selecting one value out of several options
                    label='Are all traffic signs outlined correctly?',  # label above the options
                    data=tb.data.OutputData(path='answer'),  # path for writing output data
                    options=[
                        tb.fields.GroupFieldOption(label='Yes', value='OK'),
                        tb.fields.GroupFieldOption(label='No', value='BAD'),
                    ],
                    validation=tb.conditions.RequiredConditionV1()  # requirement to select one of the options
                )
            ]
        ),
        plugins=[
            tb.plugins.HotkeysPluginV1( # shortcuts for selecting options using the keyboard
                key_1=tb.actions.SetActionV1(data=tb.data.OutputData(path='result'), payload='OK'),
                key_2=tb.actions.SetActionV1(data=tb.data.OutputData(path='result'), payload='BAD')
            )
        ]
    )
)

# You can write instructions and upload them from a file or enter them later in the web interface
# prepared_instruction = open('instruction.html').read().strip()
verification_instruction = '''<b>Look at the image and answer the question:</b><br/>
Are all traffic signs outlined correctly?<br/>
If they are, click Yes.<br/>
If they aren't, click No.<br/>
For example, the road signs here are outlined correctly, so the correct answer is Yes.'''

# Set up the project
verification_project = toloka.project.Project(
    assignments_issuing_type=toloka.project.Project.AssignmentsIssuingType.AUTOMATED,
    public_name='Are the traffic signs outlined correctly?',
    public_description='Look at the image and decide whether or not the traffic signs are outlined correctly',
    public_instructions=verification_instruction,
    # Set up the task: view, input, and output parameters
    task_spec=toloka.project.task_spec.TaskSpec(
        input_spec={
            'text': toloka.project.field_spec.StringSpec(),
            'result': toloka.project.field_spec.FileSpec(),
            'assignment_id': toloka.project.field_spec.StringSpec(),
        },
        # We have to set allowed_values because we'll be using smart mixing to get the results of this project
        output_spec={'result': toloka.project.field_spec.StringSpec(allowed_values=['OK', 'BAD'])},
        view_spec=verification_interface,
    ),
)

# Call the API to create a new project
verification_project = toloka_client.create_project(verification_project)
print(f'Created verification project with id {verification_project.id}')
print(f'To view the project, go to: https://toloka.yandex.com/requester/project/{verification_project.id}')

In [None]:
verification_pool = toloka.pool.Pool(    
    project_id=verification_project.id,
    private_name='Pool 1. Road sign verification',  # Only you can see this information.
    may_contain_adult_content=False,
    will_expire=datetime.datetime.utcnow() + datetime.timedelta(days=365),  # Pool will close after one year
    reward_per_assignment=0.01,  # We set the minimum payment amount for one task page
                                 # By default, auto_accept_solutions is on,
                                 # so we'll pay for all tasks
    assignment_max_duration_seconds=60*10,  # Give performers 10 minutes to complete one task
    defaults=toloka.pool.Pool.Defaults(
        # We need an overlap to check the performers among themselves,
        # and we need to set a incremental relabeling (dynamic overlap) value less than max_overlap
        default_overlap_for_new_task_suites=2,
    ),
)

# We'll only show our tasks to English-speaking users because the description of the task is in English.
# We also won't allow our verification tasks to be performed by users who performed handwriting tasks
verification_pool.filter = (
    (toloka.filter.Languages.in_(f'{language}')) &
    (toloka.filter.Skill(handwriting_skill.id) == None)
)


In [None]:
# Set up quality control
# Quality is based on the majority of matching responses from performers who completed the same task.
verification_pool.quality_control.add_action(
    collector=toloka.collectors.MajorityVote(answer_threshold=2),
    # If a performer has 10 or more responses
    # and the responses are correct in less than 50% of cases,
    conditions=[
        toloka.conditions.TotalAnswersCount > 9,
        toloka.conditions.CorrectAnswersRate < 50,
    ],
    # we ban the performer from all our projects for 10 days
    action=toloka.actions.RestrictionV2(
        scope=toloka.user_restriction.UserRestriction.ALL_PROJECTS,
        duration=10,
        duration_unit='DAYS',
        private_comment=' Doesn\'t match the majority',  # Only you will see this comment
    )
)

# Set up checking skills using MajorityVote
# Depending on the percentage of correct responses, we increase the value of the performer's skill
verification_pool.quality_control.add_action(
    collector=toloka.collectors.MajorityVote(answer_threshold=2, history_size=10),
    conditions=[
        toloka.conditions.TotalAnswersCount > 2,
    ],
    action=toloka.actions.SetSkillFromOutputField(
        skill_id=verification_skill.id,
        from_field='correct_answers_rate',
    ),
)
print('Quality rule count:', len(verification_pool.quality_control.configs))

#### Задания, которые были размечены неверно, отправляем обратно в проект 1

In [None]:
# Set the task count for one page and turn task shuffling ON to enable incremental relabeling
verification_pool.set_mixer_config(
    real_tasks_count=10,
    golden_tasks_count=0,
    training_tasks_count=0,
    mix_tasks_in_creation_order=True,  # Enable shuffle mode to use incremental relabeling
    force_last_assignment=True,
)
# Create incremental relabeling
verification_pool.set_dynamic_overlap_config(
    type='BASIC',
    max_overlap=5,       # Each task can be completed a maximum of 5 times
    min_confidence=0.8,  # Percentage, where 100% = 1.0
    answer_weight_skill_id=verification_skill.id,  # Incremental relabeling by verification skill
    fields=[toloka.pool.DynamicOverlapConfig.Field(name='answer')],
)

verification_pool = toloka_client.create_pool(verification_pool)
print(f'To view this pool, visit: https://toloka.yandex.com/requester/project/{verification_project.id}/pool/{verification_pool.id}')

#### Добавим задания и запустим проекты

In [None]:
tasks = [
    toloka.task.Task(input_values={'text': url}, pool_id=handwriting_pool.id)
    for url in dataset['INPUT:text'].values[:20]
]
# Add tasks to a pool
toloka_client.create_tasks(tasks, toloka.task.CreateTasksParameters(allow_defaults=True))
print(f'Populated handwriting pool with {len(tasks)} tasks')
print(f'To view this pool, visit: https://toloka.yandex.com/requester/project/{handwriting_project.id}/pool/{handwriting_pool.id}')

# Open the handwriting pool
handwriting_pool = toloka_client.open_pool(handwriting_pool.id)

In [None]:
def wait_pool_for_close(pool):
    sleep_time = 60
    pool = toloka_client.get_pool(pool.id)
    while not pool.is_closed():
        print(
            f'   {datetime.datetime.now().strftime("%H:%M:%S")}\t'
            f'Pool {pool.id} has status {pool.status}.'
        )
        time.sleep(sleep_time)
        pool = toloka_client.get_pool(pool.id)

# Wait for the handwriting pool
print('\nWaiting for the handwriting pool to close')
wait_pool_for_close(handwriting_pool)
print(f'handwriting pool {handwriting_pool.id} is finally closed!')

Из-за специфики работы с Толокой мы не можем просто так взять и загрузить полученные от толокеров в первом проекте файлы и загрузить во второй. Надо загрузить их на Яндекс.Облако или Яндекс.Диск и получить публичные ссылки для вставки в задание

In [None]:
import requests

In [None]:
headers = {"Authorization": "OAuth " + token}

In [None]:
req = requests.get("https://toloka.yandex.ru/api/v1/attachments?pool_id={}".format(pool.id), headers=headers)

In [None]:
import json

In [None]:
req.json()['items']

In [None]:
import yadisk

In [None]:
disk_token = input()
disk_headers = {"Authorization": "OAuth " + disk_token}
y = yadisk.YaDisk(token=disk_token)

In [None]:
urls = []
for file in data['items']:
    fileid = file['id']
    req = requests.get("https://toloka.yandex.ru/api/v1/attachments/{}/download".format(fileid), "{}.jpg".format(fileid),
                       headers=headers)
    open(f'{fileid}.jpg', 'wb').write(req.content)
    y.upload(f'{fileid}.jpg', f'{fileid}.jpg')
    req_put = requests.put("https://cloud-api.yandex.net/v1/disk/resources/publish?path=%2Fsample.jpg", headers=disk_headers)
    req_get = requests.get('https://cloud-api.yandex.net/v1/disk/resources?path=disk%3A%2Fsample.jpg', headers=disk_headers)
    urls.append((req_get.json()['public_url'],  fileid))

#### Дальше запускаем только после закрытия пула!

In [None]:
def prepare_verification_tasks():
    verification_tasks = []  # Tasks that we will send for verification
    # Create and store new tasks
    for assignment in urls:
        verification_tasks.append(
            toloka.task.Task(
                input_values={
                    'text': assignment.tasks[0].input_values['text'],
                    'image': assignment.solutions[0].output_values['result'],
                    'assignment_id': assignment.id,
                },
                pool_id=verification_pool.id,
            )
        )
    print(f'Generated {len(verification_tasks)} new verification tasks')
    return verification_tasks


def run_verification_pool(verification_tasks):
    verification_tasks_result = toloka_client.create_tasks(
        verification_tasks,
        toloka.task.CreateTasksParameters(allow_defaults=True)
    )
    # We'll store our verification_task-handwriting_assignments references. We'll need it later.
    task_to_assignment = {}
    for task in verification_tasks_result.items.values():
        task_to_assignment[task.id] = task.input_values['assignment_id']

    # Open the verification pool
    pool = toloka_client.open_pool(verification_pool.id)
    print(f'Verification pool status - {pool.status}')
    return task_to_assignment

# Prepare the tasks
verification_tasks = prepare_verification_tasks()
# Add it to the pool and run the pool
task_to_assignment = run_verification_pool(verification_tasks)

In [None]:
print('\nWaiting for verification pool to close')
wait_pool_for_close(verification_pool)
print(f'Verification pool {verification_pool.id} is finally closed!')

#### Агрегация по необходимости

In [None]:
def get_aggregation_results():
    print('Start aggregation in the verification pool')
    aggregation_operation = toloka_client.aggregate_solutions_by_pool(
        type=toloka.aggregation.AggregatedSolutionType.WEIGHTED_DYNAMIC_OVERLAP,
        pool_id=verification_pool.id,   # Aggregate in this pool
        answer_weight_skill_id=verification_skill.id,   # Aggregate by this skill
        fields=[toloka.aggregation.PoolAggregatedSolutionRequest.Field(name='result')]  # Aggregate this field
    )

    # This may take some time
    aggregation_operation = toloka_client.wait_operation(aggregation_operation)
    print('Results aggregated')

    # Get aggregated results
    # Set a limit to show how to iterate over aggregation results
    aggregation_result = toloka_client.find_aggregated_solutions(aggregation_operation.id, limit=5)
    verification_results = aggregation_result.items
    # If we have more results, let's get them
    while aggregation_result.has_more:
        aggregation_result = toloka_client.find_aggregated_solutions(
            aggregation_operation.id,
            # We have to establish which id we want to get results from (or else we'll loop back)
            # This is usually the last item id in the previous request
            task_id_gt=aggregation_result.items[len(aggregation_result.items) - 1].task_id,
        )
        verification_results = verification_results + aggregation_result.items
    return verification_results

def set_handwriting_status(verification_results):
    # Reject or accept tasks in the handwriting pool
    print('Started adding results to handwriting tasks')
    for r in verification_results:
        # We need to reject or accept only previously stored assignments
        # If we try to accept or reject an already accepted assignment, an exception will be thrown
        if r.task_id not in task_to_assignment:
            continue
        # Find assignment_id in the input by task_id
        assignment_id = task_to_assignment[r.task_id]
        if r.output_values['result'] == 'OK':
            toloka_client.accept_assignment(assignment_id, "Well done!")
        else:
            toloka_client.reject_assignment(assignment_id, 'The object wasn\'t selected or was selected incorrectly.')
    print('Finished adding results to handwriting tasks')


# Aggregation operation
verification_results = get_aggregation_results()
# Reject or accept tasks in the handwriting pool
set_handwriting_status(verification_results)

In [None]:
while True:    
    print('\nWaiting for handwriting pool to close')
    wait_pool_for_close(handwriting_pool)
    print(f'handwriting pool {handwriting_pool.id} is finally closed!')

    # Preparing tasks
    verification_tasks = prepare_verification_tasks()

    # Make sure all the tasks are done
    if len(verification_tasks) == 0:
        print('All the tasks in our project are done')
        break

    # Add it to the pool and run the pool
    task_to_assignment = run_verification_pool(verification_tasks)

    print('\nWaiting for verification pool to close')
    wait_pool_for_close(verification_pool)
    print(f'Verification pool {verification_pool.id} is finally closed!')

    # Aggregation operation
    verification_results = get_aggregation_results()
    # Reject or accept tasks in the handwriting pool
    set_handwriting_status(verification_results)


print(f'Results received at {datetime.datetime.now()}')

#### Далее - дополнительная часть по необходимости. В зависимости от содержания первого датасета (количества слов в каждой фразе) и цели сбора разметки, нам может не требоваться выделение отдельных слов рамками. А так тут снова два проекта - сама разметка и проверка корректности

### Шаг (проект) 3. Получим от толокеров разметку слов на изображении рамками

In [None]:
# Датасет для третьего проекта - список ссылок на изображения
dataset = pandas.read_csv('dataset.tsv', sep='\t')
dataset.head()

#### Интерфейс проекта и все такое

In [None]:
# How performers will see the task
project_interface = toloka.project.view_spec.TemplateBuilderViewSpec(
    config=tb.TemplateBuilder(
        view=tb.fields.ImageAnnotationFieldV1(  # component for selecting areas in images
            image=tb.data.InputData(path='image'),  # getter for the input image
            data=tb.data.OutputData(path='result'),  # path for writing output data
            shapes={tb.fields.ImageAnnotationFieldV1.Shape.RECTANGLE: True},  # allow to select only rectangular areas
            validation=tb.conditions.RequiredConditionV1()  # at least one area should be selected
        )
    )
)

# You can write instructions and upload them from a file or enter them later in the web interface
# prepared_instruction = open('instruction.html').read().strip()
prepared_instruction = '<b>Draw a rectangle around all the traffic signs in the image.</b>'

# Set up the project
segmentation_project = toloka.project.Project(
    assignments_issuing_type=toloka.project.Project.AssignmentsIssuingType.AUTOMATED,
    public_name='Outline the traffic signs in the image',
    public_description='Outline all traffic signs in the image with a rectangle',
    public_instructions=prepared_instruction,
    # Set up the task: view, input, and output parameters
    task_spec=toloka.project.task_spec.TaskSpec(
        input_spec={'image': toloka.project.field_spec.UrlSpec()},
        output_spec={'result': toloka.project.field_spec.JsonSpec()},
        view_spec=project_interface,
    ),
)

# Call the API to create a new project
segmentation_project = toloka_client.create_project(segmentation_project)
print(f'Created segmentation project with id {segmentation_project.id}')
print(f'To view the project, go to: https://toloka.yandex.com/requester/project/{segmentation_project.id}')

#### Снова два навыка - для разделения толокеров, чтобы исполнители в 3 и 4 проектах не пересекались и никто не проверял сам себя, и навык качества

In [None]:
segmentation_skill = next(toloka_client.get_skills(name='Area selection of road signs'), None)
if segmentation_skill:
    print('Segmentation skill already exists')
else:
    print('Create new segmentation skill')
    segmentation_skill = toloka_client.create_skill(
        name='Area selection of road signs',
        hidden=True,
        public_requester_description={'EN': 'Performer is annotating road signs'},
    )

verification_skill = next(toloka_client.get_skills(name='Segmentation verification'), None)
if verification_skill:
    print('Verification skill already exists')
else:
    print('Create new verification skill')
    verification_skill = toloka_client.create_skill(
        name='Segmentation verification',
        hidden=True,
        public_requester_description={'EN': 'How good a performer is at verifying segmentation tasks'},
    )

#### Подготовка пула

In [None]:
segmentation_pool = toloka.pool.Pool(
    project_id=segmentation_project.id,
    private_name='Pool 1',  # Only you can see this information.
    may_contain_adult_content=False,
    will_expire=datetime.datetime.utcnow() + datetime.timedelta(days=365),  # Pool will close after one year
    reward_per_assignment=0.01,     # We set the minimum payment amount for one task page
    auto_accept_solutions=False,    # We will only pay the performer for completing the task,
                                    #    based on the verification results of the second project
    auto_accept_period_day=1,       # Number of days to determine if we'll pay
    assignment_max_duration_seconds=60*20,  # Give performers 20 minutes to complete one task
    defaults=toloka.pool.Pool.Defaults(
        # We don't need overlapping for segmentation tasks
        default_overlap_for_new_task_suites=1,
        default_overlap_for_new_tasks=1,
    ),
)

# Set the number of tasks per page
segmentation_pool.set_mixer_config(real_tasks_count=1, golden_tasks_count=0, training_tasks_count=0)
# Please note that the payment amount specified when creating the pool is the amount the performer receives for completing one page of tasks.
# If you specify 10 tasks per page above, then reward_per_assignment will be paid for completing 10 tasks.

# We'll only show our tasks to English-speaking users because the description of the task is in English.
# This means that only people who speak English will be able to accept this task.
segmentation_pool.filter = toloka.filter.Languages.in_('EN')

print(segmentation_pool.private_name)

#### Настройка контроля качества

In [None]:
# The first rule in this project restricts pool access for performers who often make mistakes
segmentation_pool.quality_control.add_action(
    collector=toloka.collectors.AcceptanceRate(),
    conditions=[
        # Performer completed more than 2 tasks
        toloka.conditions.TotalAssignmentsCount > 2,
        # and more than 35% of their responses were rejected
        toloka.conditions.RejectedAssignmentsRate > 35,
    ],
    # This action tells Toloka what to do if the condition above is True
    # In our case, we'll restrict access for 15 days
    # Always leave a comment: it may be useful later on
    action=toloka.actions.RestrictionV2(
        scope=toloka.user_restriction.UserRestriction.ALL_PROJECTS,
        duration=15,
        duration_unit='DAYS',
        private_comment='Performer often make mistakes',  # Only you will see this comment
    )
)

In [None]:
# The second useful rules is "Fast responses". It allows us to filter out performers who respond too quickly.
segmentation_pool.quality_control.add_action(
    # Let's monitor fast submissions for the last 5 completed task pages
    # and define a quick response as one that takes less than 20 seconds
    collector=toloka.collectors.AssignmentSubmitTime(history_size=5, fast_submit_threshold_seconds=20),
    # If we see more than one fast response,
    conditions=[toloka.conditions.FastSubmittedCount > 1],
    # we ban the performer from all our projects for 10 days
    action=toloka.actions.RestrictionV2(
        scope=toloka.user_restriction.UserRestriction.ALL_PROJECTS,
        duration=10,
        duration_unit='DAYS',
        private_comment='Fast responses',  # Only you will see this comment
    )
)

In [None]:
# Another rule we use is for automatically updating skills
# This isn't really about quality, but rules can do a lot of useful things
# We update the segmentation skill for performers who complete at least one task
segmentation_pool.quality_control.add_action(
    collector=toloka.collectors.AnswerCount(),
    # If the performer completed at least one task,
    conditions=[toloka.conditions.AssignmentsAcceptedCount > 0],
    # it doesn't add to the skill, it sets the new skill to 1
    action=toloka.actions.SetSkill(skill_id=segmentation_skill.id, skill_value=1),
)

# Recompletion of rejected assignments sends the tasks you rejected to other performers according to a specified rules.
segmentation_pool.quality_control.add_action(
    collector=toloka.collectors.AssignmentsAssessment(),
    # Check if a task was rejected
    conditions=[toloka.conditions.AssessmentEvent == toloka.conditions.AssessmentEvent.REJECT],
    # If the condition is True, add 1 to overlap and open the pool
    action=toloka.actions.ChangeOverlap(delta=1, open_pool=True),
)

print('Quality rules count:', len(segmentation_pool.quality_control.configs))

In [None]:
segmentation_pool = toloka_client.create_pool(segmentation_pool)
print(f'To view this pool, visit: https://toloka.yandex.com/requester/project/{segmentation_project.id}/pool/{segmentation_pool.id}')

### Шаг (проект) 4. Проверка проекта 3 другими толокерами

In [None]:
verification_interface = toloka.project.view_spec.TemplateBuilderViewSpec(
    config=tb.TemplateBuilder(
        view=tb.view.ListViewV1(  # list of components that should be positioned from top to bottom in the ui
            items=[
                tb.fields.ImageAnnotationFieldV1(  # image and selected areas to verify
                    image=tb.data.InputData(path='image'),
                    data=tb.data.InternalData(path='selection',
                                              default=tb.data.InputData(path='selection')),  # using the input field as default value to display the selected areas
                    disabled=True  # disable adding and deleting areas
                ),
                tb.fields.RadioGroupFieldV1(  # a component for selecting one value out of several options
                    label='Are all traffic signs outlined correctly?',  # label above the options
                    data=tb.data.OutputData(path='result'),  # path for writing output data
                    options=[
                        tb.fields.GroupFieldOption(label='Yes', value='OK'),
                        tb.fields.GroupFieldOption(label='No', value='BAD'),
                    ],
                    validation=tb.conditions.RequiredConditionV1()  # requirement to select one of the options
                )
            ]
        ),
        plugins=[
            tb.plugins.HotkeysPluginV1( # shortcuts for selecting options using the keyboard
                key_1=tb.actions.SetActionV1(data=tb.data.OutputData(path='result'), payload='OK'),
                key_2=tb.actions.SetActionV1(data=tb.data.OutputData(path='result'), payload='BAD')
            )
        ]
    )
)

# You can write instructions and upload them from a file or enter them later in the web interface
# prepared_instruction = open('instruction.html').read().strip()
verification_instruction = '''<b>Look at the image and answer the question:</b><br/>
Are all traffic signs outlined correctly?<br/>
If they are, click Yes.<br/>
If they aren't, click No.<br/>
For example, the road signs here are outlined correctly, so the correct answer is Yes.'''

# Set up the project
verification_project = toloka.project.Project(
    assignments_issuing_type=toloka.project.Project.AssignmentsIssuingType.AUTOMATED,
    public_name='Are the traffic signs outlined correctly?',
    public_description='Look at the image and decide whether or not the traffic signs are outlined correctly',
    public_instructions=verification_instruction,
    # Set up the task: view, input, and output parameters
    task_spec=toloka.project.task_spec.TaskSpec(
        input_spec={
            'image': toloka.project.field_spec.UrlSpec(),
            'selection': toloka.project.field_spec.JsonSpec(),
            'assignment_id': toloka.project.field_spec.StringSpec(),
        },
        # We have to set allowed_values because we'll be using smart mixing to get the results of this project
        output_spec={'result': toloka.project.field_spec.StringSpec(allowed_values=['OK', 'BAD'])},
        view_spec=verification_interface,
    ),
)

# Call the API to create a new project
verification_project = toloka_client.create_project(verification_project)
print(f'Created verification project with id {verification_project.id}')
print(f'To view the project, go to: https://toloka.yandex.com/requester/project/{verification_project.id}')

In [None]:
verification_pool = toloka.pool.Pool(    
    project_id=verification_project.id,
    private_name='Pool 1. Road sign verification',  # Only you can see this information.
    may_contain_adult_content=False,
    will_expire=datetime.datetime.utcnow() + datetime.timedelta(days=365),  # Pool will close after one year
    reward_per_assignment=0.01,  # We set the minimum payment amount for one task page
                                 # By default, auto_accept_solutions is on,
                                 # so we'll pay for all tasks
    assignment_max_duration_seconds=60*10,  # Give performers 10 minutes to complete one task
    defaults=toloka.pool.Pool.Defaults(
        # We need an overlap to check the performers among themselves,
        # and we need to set a incremental relabeling (dynamic overlap) value less than max_overlap
        default_overlap_for_new_task_suites=2,
    ),
)

# We'll only show our tasks to English-speaking users because the description of the task is in English.
# We also won't allow our verification tasks to be performed by users who performed segmentation tasks
verification_pool.filter = (
    (toloka.filter.Languages.in_('EN')) &
    (toloka.filter.Skill(segmentation_skill.id) == None)
)


In [None]:
# Set up quality control
# Quality is based on the majority of matching responses from performers who completed the same task.
verification_pool.quality_control.add_action(
    collector=toloka.collectors.MajorityVote(answer_threshold=2),
    # If a performer has 10 or more responses
    # and the responses are correct in less than 50% of cases,
    conditions=[
        toloka.conditions.TotalAnswersCount > 9,
        toloka.conditions.CorrectAnswersRate < 50,
    ],
    # we ban the performer from all our projects for 10 days
    action=toloka.actions.RestrictionV2(
        scope=toloka.user_restriction.UserRestriction.ALL_PROJECTS,
        duration=10,
        duration_unit='DAYS',
        private_comment=' Doesn\'t match the majority',  # Only you will see this comment
    )
)

# Set up checking skills using MajorityVote
# Depending on the percentage of correct responses, we increase the value of the performer's skill
verification_pool.quality_control.add_action(
    collector=toloka.collectors.MajorityVote(answer_threshold=2, history_size=10),
    conditions=[
        toloka.conditions.TotalAnswersCount > 2,
    ],
    action=toloka.actions.SetSkillFromOutputField(
        skill_id=verification_skill.id,
        from_field='correct_answers_rate',
    ),
)
print('Quality rule count:', len(verification_pool.quality_control.configs))

#### Задания, которые были размечены неверно, отправляем обратно в проект 3

In [None]:
# Set the task count for one page and turn task shuffling ON to enable incremental relabeling
verification_pool.set_mixer_config(
    real_tasks_count=10,
    golden_tasks_count=0,
    training_tasks_count=0,
    mix_tasks_in_creation_order=True,  # Enable shuffle mode to use incremental relabeling
    force_last_assignment=True,
)
# Create incremental relabeling
verification_pool.set_dynamic_overlap_config(
    type='BASIC',
    max_overlap=5,       # Each task can be completed a maximum of 5 times
    min_confidence=0.8,  # Percentage, where 100% = 1.0
    answer_weight_skill_id=verification_skill.id,  # Incremental relabeling by verification skill
    fields=[toloka.pool.DynamicOverlapConfig.Field(name='result')],
)

verification_pool = toloka_client.create_pool(verification_pool)
print(f'To view this pool, visit: https://toloka.yandex.com/requester/project/{verification_project.id}/pool/{verification_pool.id}')

#### Добавим задания и запустим проекты

In [None]:
tasks = [
    toloka.task.Task(input_values={'image': url}, pool_id=segmentation_pool.id)
    for url in dataset['image'].values[:20]
]
# Add tasks to a pool
toloka_client.create_tasks(tasks, toloka.task.CreateTasksParameters(allow_defaults=True))
print(f'Populated segmentation pool with {len(tasks)} tasks')
print(f'To view this pool, visit: https://toloka.yandex.com/requester/project/{segmentation_project.id}/pool/{segmentation_pool.id}')

# Open the segmentation pool
segmentation_pool = toloka_client.open_pool(segmentation_pool.id)

In [None]:
def wait_pool_for_close(pool):
    sleep_time = 60
    pool = toloka_client.get_pool(pool.id)
    while not pool.is_closed():
        print(
            f'   {datetime.datetime.now().strftime("%H:%M:%S")}\t'
            f'Pool {pool.id} has status {pool.status}.'
        )
        time.sleep(sleep_time)
        pool = toloka_client.get_pool(pool.id)

# Wait for the segmentation pool
print('\nWaiting for the segmentation pool to close')
wait_pool_for_close(segmentation_pool)
print(f'Segmentation pool {segmentation_pool.id} is finally closed!')

#### Дальше запускаем только после закрытия пула!

In [None]:
def prepare_verification_tasks():
    verification_tasks = []  # Tasks that we will send for verification
    request = toloka.search_requests.AssignmentSearchRequest(
        status=toloka.assignment.Assignment.SUBMITTED,  # Only take completed tasks that haven't been accepted or rejected
        pool_id=segmentation_pool.id,
    )
    # Create and store new tasks
    for assignment in toloka_client.get_assignments(request):
        verification_tasks.append(
            toloka.task.Task(
                input_values={
                    'image': assignment.tasks[0].input_values['image'],
                    'selection': assignment.solutions[0].output_values['result'],
                    'assignment_id': assignment.id,
                },
                pool_id=verification_pool.id,
            )
        )
    print(f'Generate {len(verification_tasks)} new verification tasks')
    return verification_tasks


def run_verification_pool(verification_tasks):
    verification_tasks_result = toloka_client.create_tasks(
        verification_tasks,
        toloka.task.CreateTasksParameters(allow_defaults=True)
    )
    # We'll store our verification_task-segmentation_assignments references. We'll need it later.
    task_to_assignment = {}
    for task in verification_tasks_result.items.values():
        task_to_assignment[task.id] = task.input_values['assignment_id']

    # Open the verification pool
    pool = toloka_client.open_pool(verification_pool.id)
    print(f'Verification pool status - {pool.status}')
    return task_to_assignment

# Prepare the tasks
verification_tasks = prepare_verification_tasks()
# Add it to the pool and run the pool
task_to_assignment = run_verification_pool(verification_tasks)

In [None]:
print('\nWaiting for verification pool to close')
wait_pool_for_close(verification_pool)
print(f'Verification pool {verification_pool.id} is finally closed!')

#### Агрегация по необходимости

In [None]:
def get_aggregation_results():
    print('Start aggregation in the verification pool')
    aggregation_operation = toloka_client.aggregate_solutions_by_pool(
        type=toloka.aggregation.AggregatedSolutionType.WEIGHTED_DYNAMIC_OVERLAP,
        pool_id=verification_pool.id,   # Aggregate in this pool
        answer_weight_skill_id=verification_skill.id,   # Aggregate by this skill
        fields=[toloka.aggregation.PoolAggregatedSolutionRequest.Field(name='result')]  # Aggregate this field
    )

    # This may take some time
    aggregation_operation = toloka_client.wait_operation(aggregation_operation)
    print('Results aggregated')

    # Get aggregated results
    # Set a limit to show how to iterate over aggregation results
    aggregation_result = toloka_client.find_aggregated_solutions(aggregation_operation.id, limit=5)
    verification_results = aggregation_result.items
    # If we have more results, let's get them
    while aggregation_result.has_more:
        aggregation_result = toloka_client.find_aggregated_solutions(
            aggregation_operation.id,
            # We have to establish which id we want to get results from (or else we'll loop back)
            # This is usually the last item id in the previous request
            task_id_gt=aggregation_result.items[len(aggregation_result.items) - 1].task_id,
        )
        verification_results = verification_results + aggregation_result.items
    return verification_results

def set_segmentation_status(verification_results):
    # Reject or accept tasks in the segmentation pool
    print('Started adding results to segmentation tasks')
    for r in verification_results:
        # We need to reject or accept only previously stored assignments
        # If we try to accept or reject an already accepted assignment, an exception will be thrown
        if r.task_id not in task_to_assignment:
            continue
        # Find assignment_id in the input by task_id
        assignment_id = task_to_assignment[r.task_id]
        if r.output_values['result'] == 'OK':
            toloka_client.accept_assignment(assignment_id, "Well done!")
        else:
            toloka_client.reject_assignment(assignment_id, 'The object wasn\'t selected or was selected incorrectly.')
    print('Finished adding results to segmentation tasks')


# Aggregation operation
verification_results = get_aggregation_results()
# Reject or accept tasks in the segmentation pool
set_segmentation_status(verification_results)

In [None]:
while True:    
    print('\nWaiting for segmentation pool to close')
    wait_pool_for_close(segmentation_pool)
    print(f'Segmentation pool {segmentation_pool.id} is finally closed!')

    # Preparing tasks
    verification_tasks = prepare_verification_tasks()

    # Make sure all the tasks are done
    if len(verification_tasks) == 0:
        print('All the tasks in our project are done')
        break

    # Add it to the pool and run the pool
    task_to_assignment = run_verification_pool(verification_tasks)

    print('\nWaiting for verification pool to close')
    wait_pool_for_close(verification_pool)
    print(f'Verification pool {verification_pool.id} is finally closed!')

    # Aggregation operation
    verification_results = get_aggregation_results()
    # Reject or accept tasks in the segmentation pool
    set_segmentation_status(verification_results)


print(f'Results received at {datetime.datetime.now()}')