# AutoQuality

This example illustrates how to use `toloka.autoquality` module. AutoQuality is a tool to help set up quality control for Toloka project.

In [None]:
!pip install toloka-kit[autoquality]==0.1.26

In [None]:
import logging
import sys

logging.basicConfig(
    format='[%(levelname)s] %(name)s: %(message)s',
    level=logging.INFO,
    stream=sys.stdout,
)

In [None]:
import toloka.client as toloka
import toloka.client.project.template_builder as tb
from toloka.autoquality import AutoQuality

import datetime
import numpy as np
import os
import requests
import pandas as pd
from tqdm import tqdm

In this example we will use a dataset for binary text classification.

In [None]:
N_ROWS = 1000

def sample_stratified(df, label_column, n_rows):
    """Function to sample n_rows from a dataframe while presenving class distribution"""
    return df.groupby(label_column, group_keys=False) \
            .apply(lambda x: x.sample(int(np.rint(n_rows*len(x)/len(df))))) \
            .sample(frac=1)

base_url = 'https://tlk.s3.yandex.net/ext_dataset/aclImdb'
df = pd.read_csv(os.path.join(base_url, 'test.csv'))
df_control = sample_stratified(df, 'label', n_rows=1000)
df = df.drop(df_control.index)
df = sample_stratified(df, 'label', n_rows=N_ROWS)

df_control = df_control.reset_index(drop=True)
df = df.reset_index(drop=True)
df.head()

In [None]:
df.label.value_counts()

In [None]:
def load_texts(urls):
    texts = []
    for url in tqdm(urls):
        resp = requests.get(url)
        texts.append(resp.text)
    return texts

df['text'] = load_texts(base_url + '/' + df.path)
df_control['text'] =  load_texts(base_url + '/' + df_control.path)

Let's create an appropriate Toloka project, a training pool and a base pool(a pool with a basic settings that will be cloned by autoquality)

In [None]:
token = input("Enter your token:")
toloka_client = toloka.TolokaClient(token, 'PRODUCTION')

In [None]:
project = toloka.Project(
    public_name='Movie review classification',
    public_description='Classify sentiment of movie reviews',
    private_comment='Auto quality control optimization experiments',
)
input_specification = {'text': toloka.project.StringSpec()}
output_specification = {'result': toloka.project.StringSpec()}

In [None]:
text_viewer = tb.TextViewV1(tb.InputData('text'))

radio_group_field = tb.ButtonRadioGroupFieldV1(
    tb.OutputData('result'),
    [
        tb.GroupFieldOption('pos', '😃 Positive'),
        tb.GroupFieldOption('neg', '😡 Negative'),
    ],
    label='What is the review sentiment?',
    validation=tb.RequiredConditionV1(hint='You need to select one answer'),
)

task_width_plugin = tb.TolokaPluginV1(
    layout=tb.TolokaPluginV1.TolokaPluginLayout(
        kind='pager', 
        task_width=500,
    )
)

hot_keys_plugin = tb.HotkeysPluginV1(
    key_1=tb.SetActionV1(tb.OutputData('result'), 'pos'),
    key_2=tb.SetActionV1(tb.OutputData('result'), 'neg'),
)

project_interface = toloka.project.TemplateBuilderViewSpec(
    view=tb.ListViewV1([radio_group_field, text_viewer]),
    plugins=[task_width_plugin, hot_keys_plugin],
)

project.task_spec = toloka.project.task_spec.TaskSpec(
    input_spec=input_specification,
    output_spec=output_specification,
    view_spec=project_interface,
)

In [None]:
project.public_instructions = """
<h2>How to complete the task</h2>
<ul>
<li>1. Look at the movie review text.</li>
<li>2. If it seems 😃 positive, assign the positive label. Otherwise assign the 😡 negative label.</li>
<li>3. If you are unsure choose the label that seems most appropriate.</li>
</ul>

In case of problems send us a message. Good luck!
""".strip()

In [None]:
project = toloka_client.create_project(project)

In [None]:
training_pool = toloka.training.Training(project_id=project.id,
    private_name='Training pool',  
    training_tasks_in_task_suite_count=5, 
    task_suites_required_to_pass=1,
    may_contain_adult_content=False,
    inherited_instructions=True,
    assignment_max_duration_seconds=60*5,
    retry_training_after_days=5,
    mix_tasks_in_creation_order=True,
    shuffle_tasks_in_task_suite=True,
)

In [None]:
training_pool = toloka_client.create_training(training_pool)

In [None]:
label_to_hint_map = {
    'pos': 'Positive', 
    'neg': 'Negative',
}


tasks = []
for l in ['pos', 'neg']: 
    examples = df[df.label == l].head(3)
    
    for ex_tuple in examples.itertuples():
        tasks.append(
            toloka.Task(input_values={'text': ex_tuple.text}, 
            known_solutions=[toloka.task.BaseTask.KnownSolution(output_values={'result': ex_tuple.label})],   
            message_on_unknown_solution=f'Incorrect label! The actual label is: {label_to_hint_map[ex_tuple.label]}',
            infinite_overlap=True,
            pool_id=training_pool.id)
        )

result = toloka_client.create_tasks(tasks, allow_defaults=True)

In [None]:
base_pool = toloka.Pool(
        project_id=project.id,
        private_name='AutoQuality Base Pool',
        may_contain_adult_content=False,
        reward_per_assignment=0.01, 
        assignment_max_duration_seconds=60*7, 
        will_expire=datetime.datetime.utcnow() + datetime.timedelta(days=365), 
        filter=(
            (toloka.filter.Languages.in_('EN')) &
            (
                (toloka.filter.ClientType == 'TOLOKA_APP') | 
                (toloka.filter.ClientType == 'BROWSER')
            )
        ),
    )

In [None]:
base_pool.set_mixer_config(
    real_tasks_count=4,
    golden_tasks_count=1
)

In [None]:
base_pool = toloka_client.create_pool(base_pool)

## AutoQuality basic usage

To use AutoQuality class you need to set project_id, base_pool_id, training_pool_id. If your target label field is different from `label` when you also need to specify it. 

In [None]:
aq = AutoQuality(
  toloka_client=toloka_client,
  project_id=project.id,
  base_pool_id=base_pool.id,
  training_pool_id=training_pool.id,
  label_field='result'
  # you can also use exam pool
  # exam_pool_id = ...,
  # exam_skill_id = ...,
)

First, call `setup_pools` to create multiple pools with a different quality control settings (autqouality pools)

In [None]:
aq.setup_pools()

Then use `create_tasks` to add tasks for every autoquality pool.

In [None]:
n_optim = 200
df_optim = df_control.iloc[:n_optim].copy()
df_optim_golden = df_control.iloc[n_optim:].copy()
df_optim.shape, df_optim_golden.shape

In [None]:
aq_tasks = []

In [None]:
for row in df_optim.itertuples():
    aq_tasks.append(
        toloka.Task(
            input_values={'text': row.text}, 
        )
    )
for row in df_optim_golden.itertuples():
    aq_tasks.append(
        toloka.Task(
            input_values={'text': row.text}, 
            known_solutions=[toloka.task.BaseTask.KnownSolution(output_values={'result': ex_tuple.label})]
        )
    )

In [None]:
aq.create_tasks(aq_tasks)

Finally, just `run` autoquality. 

In [None]:
aq.run()

After that your autoquality instance will have some useful attributes with the results of the work.

In [None]:
aq.best_pool_id

In [None]:
aq.best_pool_params

You can also compare all autoqualoty pools by a variety of different metrics

In [None]:
aq.ranks

And archive all pools created by autoquality.

In [None]:
aq.archive_autoquality_pools()

## Autoquality advanced usage

AutoQuality class provides many ways to customize your optimization algorithm. Let's create another instance with a different settings.

First of all, you can set `n_iter` parameter which determines how many autoquality pools will be created.

In [None]:
aq = AutoQuality(
  toloka_client=toloka_client,
  project_id=project.id,
  base_pool_id=base_pool.id,
  training_pool_id=training_pool.id,
  label_field='result',
  n_iter=5
)

Also you can change the distributions for quality control parameters optimized by autoquality. In this example we will change the distributions for the majority vote rule. AutoQuality will sample new values for every autoquality pool from this distributions.

In [None]:
from scipy import stats
aq.parameter_distributions['MajorityVote'] = dict(
    history_size=[3, 5, 7], 
    incorrect_answers_rate=stats.norm(loc=70, scale=10)
)

Finally, you can customize methods which calculates scores or ranks. Let's modify the ranking function to give preference to a cheaper pools. Do not forget to set your new rank to a `main_rank` column so that AutoQuality knows how to choose the best pool.

In [None]:
from toloka.autoquality.scoring import default_calc_ranks
def my_new_calc_ranks(scores_df: pd.DataFrame) -> pd.DataFrame:
    ranks = default_calc_ranks(scores_df)
    ranks['my_new_rank'] = (0.4 * scores_df['avg_quality_rank']
                            + 0.1 * (1 / 2) * scores_df['bans_ratio_rank']
                            + 0.5 * scores_df['spending_per_task_rank']
                            + 0.1 * (1 / 2) * scores_df['tasks_per_second_rank']
                           )
    ranks['main_rank'] = ranks['my_new_rank']
    return ranks
aq.ranking_func = my_new_calc_ranks

You can create completely new scoring and ranking functions to use AutoQuality the way you need. Just keep the same signature as in the [default methods](https://github.com/Toloka/toloka-kit/blob/main/src/autoquality/scoring.py).

Now let's run our modified AutoQuality instance again

In [None]:
aq.setup_pools()

In [None]:
aq.create_tasks(aq_tasks)

In [None]:
aq.run()

In [None]:
aq.best_pool_params

In [None]:
aq.ranks

In [None]:
aq.archive_autoquality_pools()