# Image classification on IMDB movie reviews

Dataset source: https://ai.stanford.edu/~amaas/data/sentiment/

Paper: https://aclanthology.org/P11-1015

## Install dependencies and import

In [None]:
!pip3 install crowd-kit==0.0.5
!pip3 install toloka-kit==0.1.13

In [1]:
import datetime
import time
import pandas as pd
import numpy as np
from sklearn.metrics import balanced_accuracy_score
import os
import logging
import sys
import requests
from tqdm.auto import tqdm

import toloka.client as toloka
import toloka.client.project.template_builder as tb

from crowdkit.aggregation import DawidSkene
%matplotlib inline
pd.options.display.max_colwidth = 300

In [2]:
logging.basicConfig(
    format='[%(levelname)s] %(name)s: %(message)s',
    level=logging.INFO,
    stream=sys.stdout,
)

#  Load dataset

In [3]:
N_ROWS = 1000

In [27]:
def sample_stratified(df, label_column, n_rows):
    """Function to sample n_rows from a dataframe while presenving class distribution"""
    return df.groupby(label_column, group_keys=False) \
            .apply(lambda x: x.sample(int(np.rint(n_rows*len(x)/len(df))))) \
            .sample(frac=1)

base_url = 'https://tlk.s3.yandex.net/ext_dataset/aclImdb'
df = pd.read_csv(os.path.join(base_url, 'test.csv'))
df_control = sample_stratified(df, 'label', n_rows=100)
df = df.drop(df_control.index)
df_training = sample_stratified(df, 'label', n_rows=10)
df = df.drop(df_training.index)
df = sample_stratified(df, 'label', n_rows=N_ROWS)
df_control = df_control.reset_index(drop=True)
df = df.reset_index(drop=True)
df.head()

Unnamed: 0,path,label
0,test/neg/3571_3.txt,neg
1,test/neg/4311_2.txt,neg
2,test/pos/8424_9.txt,pos
3,test/neg/9456_4.txt,neg
4,test/neg/3853_1.txt,neg


In [28]:
df.label.value_counts()

neg    500
pos    500
Name: label, dtype: int64

In [29]:
def load_texts(urls):
    texts = []
    for url in tqdm(urls):
        resp = requests.get(url)
        texts.append(resp.text)
    return texts

df_control['text'] = load_texts(base_url + '/' + df_control.path)
df_training['text'] = load_texts(base_url + '/' + df_training.path)
df['text'] = load_texts(base_url + '/' + df.path)

100%|██████████| 100/100 [00:11<00:00,  8.87it/s]
100%|██████████| 10/10 [00:01<00:00,  7.74it/s]
100%|██████████| 1000/1000 [02:04<00:00,  8.04it/s]


In [30]:
df.head()

Unnamed: 0,path,label,text
0,test/neg/3571_3.txt,neg,Upon writing this review I have difficulty trying to think of what to write about. Nothing much happens in this film. The storyline is a South Asian woman who falls for an English Londoner. The problem is he and his friends have had a racist streak. At the same time her friend at work is unknowi...
1,test/neg/4311_2.txt,neg,"What a clunker!<br /><br />It MUST have been made for TV or Cable.<br /><br />Look: forget the screenplay - forget the bunch of forgettable actors. Excuse me? Continuity? The NSA/NIA/whatever or whoever he is (an agent) takes-off in an F16 - is shown in an F18 chucking his guts up and, later, th..."
2,test/pos/8424_9.txt,pos,This review has been written by someone who has read it (several times) and knows what they are talking about!. Firstly I have read others comments and noticed that some of the objections were really quite stupid. People who have read the book and other Jane Austens or for that matter any good b...
3,test/neg/9456_4.txt,neg,"Ouch!! What a mess we have here. Not so much of a mess as a painfully dull, half-assed excuse for exploitation. Brought to you by the one and only, J. G. ""Pat"" Patterson, yeah, the same one from Moonshine Mountain. Doctor Gore, formerly known as The Body Shop, is, I guess, somewhat inspired by F..."
4,test/neg/3853_1.txt,neg,I am almost tempted to demand my money back from the video store. This movie plumbs the depths of inanity and is almost completely unwatchable. I NEVER bail out of a film early but this was painful to view. A thorough waste of celluloid. My vote 1/10 (it would have been zero).


# Setup the project

In [13]:
toloka_client = toloka.TolokaClient(input("Enter your token:"), 'PRODUCTION')

## Create project

In [31]:
project = toloka.Project(
    public_name='Movie review classification',
    public_description='Classify sentiment of movie reviews',
    private_comment='OOTB: IMDb Movie Reviews'
)

In [32]:
input_specification = {'text': toloka.project.StringSpec()}
output_specification = {'result': toloka.project.StringSpec()}

In [33]:
text_viewer = tb.TextViewV1(tb.InputData('text'))

radio_group_field = tb.ButtonRadioGroupFieldV1(
    tb.OutputData('result'),
    [
        tb.GroupFieldOption('pos', '😃 Positive'),
        tb.GroupFieldOption('neg', '😡 Negative'),
    ],
    label='What is the review sentiment?',
    validation=tb.RequiredConditionV1(hint='You need to select one answer'),
)

task_width_plugin = tb.TolokaPluginV1(
    layout=tb.TolokaPluginV1.TolokaPluginLayout(
        kind='pager', 
        task_width=500,
    )
)

hot_keys_plugin = tb.HotkeysPluginV1(
    key_1=tb.SetActionV1(tb.OutputData('result'), 'pos'),
    key_2=tb.SetActionV1(tb.OutputData('result'), 'neg'),
)

project_interface = toloka.project.TemplateBuilderViewSpec(
    view=tb.ListViewV1([radio_group_field, text_viewer]),
    plugins=[task_width_plugin, hot_keys_plugin],
)

project.task_spec = toloka.project.task_spec.TaskSpec(
    input_spec=input_specification,
    output_spec=output_specification,
    view_spec=project_interface,
)

In [34]:
project.public_instructions = """
<h2>How to complete the task</h2>
<ul>
<li>1. Look at the movie review text.</li>
<li>2. If it seems 😃 positive, assign the positive label. Otherwise assign the 😡 negative label.</li>
<li>3. If you are unsure choose the label that seems most appropriate.</li>
</ul>

In case of problems send us a message. Good luck!
""".strip()

In [35]:
project = toloka_client.create_project(project)

[INFO] toloka.client: A new project with ID "61042" has been created. Link to open in web interface: https://toloka.dev/requester/project/61042


## Create training tasks

In [65]:
training_pool = toloka.Training(
    project_id=project.id,
    private_name='Training pool',  
    training_tasks_in_task_suite_count=5, 
    task_suites_required_to_pass=1,
    may_contain_adult_content=False,
    inherited_instructions=True,
    assignment_max_duration_seconds=60*5,
    retry_training_after_days=1,
    mix_tasks_in_creation_order=True,
    shuffle_tasks_in_task_suite=True,
)

In [37]:
training_pool = toloka_client.create_training(training_pool)

[INFO] toloka.client: A new training with ID "28288532" has been created. Link to open in web interface: https://toloka.dev/requester/project/61042/training/28288532


In [38]:
label_to_hint_map = {
    'pos': 'Positive', 
    'neg': 'Negative',
}


tasks = []
for l in ['pos', 'neg']: 
    examples = df_training[df_training.label == l]
    
    for ex_tuple in examples.itertuples():
        tasks.append(
            toloka.Task(
                input_values={'text': ex_tuple.text}, 
                known_solutions=[toloka.task.BaseTask.KnownSolution(output_values={'result': ex_tuple.label})],   
                message_on_unknown_solution=f'Incorrect label! The actual label is: {label_to_hint_map[ex_tuple.label]}',
                infinite_overlap=True,
                pool_id=training_pool.id,
            )
        )

In [39]:
result = toloka_client.create_tasks(tasks, allow_defaults=True)
print(len(result.items))

10


## Create task Pool

In [40]:
pool = toloka.Pool(
    project_id=project.id,
    private_name='Pool',
    may_contain_adult_content=False,
    reward_per_assignment=0.02, 
    assignment_max_duration_seconds=60*5, 
    will_expire=datetime.datetime.utcnow() + datetime.timedelta(days=365), 
    filter=(
        (toloka.filter.Languages.in_('EN')) &
        (toloka.filter.ClientType == 'BROWSER')
    ),
)
pool.defaults = toloka.pool.Pool.Defaults(
    default_overlap_for_new_task_suites=5,
)
pool.set_mixer_config(
    real_tasks_count=4,
    golden_tasks_count=1,
)

In [41]:
pool.quality_control.training_requirement = toloka.quality_control.QualityControl.TrainingRequirement(
    training_pool_id=training_pool.id, 
    training_passing_skill_value=50,
)


pool.quality_control.add_action(
    collector=toloka.collectors.GoldenSet(
        history_size=5,
    ),
    conditions=[
        toloka.conditions.GoldenSetAnswersCount >= 5,
        toloka.conditions.IncorrectAnswersRate >= 30,
    ],
    action=toloka.actions.RestrictionV2(
        scope='PROJECT',
        duration=1,
        duration_unit='DAYS',
        private_comment='Low quality of responses',
    ),    
)


pool.quality_control.add_action(
    collector=toloka.collectors.AssignmentSubmitTime(history_size=5, fast_submit_threshold_seconds=15),
    conditions=[
        toloka.conditions.TotalSubmittedCount >= 5,
        toloka.conditions.FastSubmittedCount >= 3
    ],
    action=toloka.actions.RestrictionV2(
        scope='PROJECT',
        duration=1,
        duration_unit='DAYS',
        private_comment='Answering too fast',
    ),    
)

pool.quality_control.add_action(
    collector=toloka.collectors.SkippedInRowAssignments(),
    conditions=[toloka.conditions.SkippedInRowCount >= 3],
    action=toloka.actions.RestrictionV2(
        scope=toloka.user_restriction.UserRestriction.PROJECT,
        duration=1,
        duration_unit='DAYS',
        private_comment='Too many skipped assignments',
    )
)

pool.quality_control.add_action(
    collector=toloka.collectors.MajorityVote(
        answer_threshold=4,
        history_size=5,
    ),
    conditions=[
        toloka.conditions.TotalAnswersCount >= 5,
        toloka.conditions.IncorrectAnswersRate > 30,
    ],
    action=toloka.actions.RestrictionV2(
        scope='PROJECT',
        duration=1,
        duration_unit='DAYS',
        private_comment='Low quality responses',
    ),    
)

In [42]:
pool = toloka_client.create_pool(pool)

[INFO] toloka.client: A new pool with ID "28288533" has been created. Link to open in web interface: https://toloka.dev/requester/project/61042/pool/28288533


## Create control tasks

In [43]:
tasks = []
for ex_tuple in df_control.itertuples():
    tasks.append(
        toloka.Task(
            input_values={'text': ex_tuple.text}, 
            known_solutions=[toloka.task.BaseTask.KnownSolution(output_values={'result': ex_tuple.label})], 
            pool_id=pool.id,
            infinite_overlap=True,
        )
    )

In [44]:
result = toloka_client.create_tasks(tasks=tasks, allow_defaults=True)
print(len(result.items))

100


## Create tasks from dataset

In [45]:
tasks = []
for ex_tuple in df.itertuples():
    tasks.append(
        toloka.Task(
            input_values={'text': ex_tuple.text}, 
            pool_id=pool.id,
        )
    )
result = toloka_client.create_tasks(tasks=tasks, allow_defaults=True)
print(len(result.items))

1000


# Start annotation

In [46]:
training_pool = toloka_client.open_pool(training_pool.id)
pool = toloka_client.open_pool(pool.id)

In [47]:
pool_id = pool.id

def wait_pool_for_close(pool_id, minutes_to_wait=0.5):
    sleep_time = 60 * minutes_to_wait
    pool = toloka_client.get_pool(pool_id)
    while not pool.is_closed():
        op = toloka_client.get_analytics([toloka.analytics_request.CompletionPercentagePoolAnalytics(subject_id=pool.id)])
        op = toloka_client.wait_operation(op)
        percentage = op.details['value'][0]['result']['value']
        print(
            f'   {datetime.datetime.now().strftime("%H:%M:%S")}\t'
            f'Pool {pool.id} - {percentage}%'
        )
        time.sleep(sleep_time)
        pool = toloka_client.get_pool(pool.id)
    print('Pool was closed.')

wait_pool_for_close(pool_id)

   12:37:03	Pool 28288533 - 0%
   12:37:35	Pool 28288533 - 0%
   12:38:07	Pool 28288533 - 0%
   12:38:39	Pool 28288533 - 2%
   12:39:12	Pool 28288533 - 4%
   12:39:44	Pool 28288533 - 6%
   12:40:16	Pool 28288533 - 7%
   12:40:50	Pool 28288533 - 9%
   12:41:22	Pool 28288533 - 10%
   12:41:54	Pool 28288533 - 11%
   12:42:26	Pool 28288533 - 13%
   12:42:58	Pool 28288533 - 15%
   12:43:31	Pool 28288533 - 17%
   12:44:03	Pool 28288533 - 19%
   12:44:37	Pool 28288533 - 21%
   12:45:09	Pool 28288533 - 24%
   12:45:41	Pool 28288533 - 27%
   12:46:14	Pool 28288533 - 29%
   12:46:46	Pool 28288533 - 32%
   12:47:19	Pool 28288533 - 36%
   12:47:51	Pool 28288533 - 40%
   12:48:24	Pool 28288533 - 44%
   12:48:56	Pool 28288533 - 47%
   12:49:29	Pool 28288533 - 51%
   12:50:01	Pool 28288533 - 54%
   12:50:33	Pool 28288533 - 57%
   12:51:06	Pool 28288533 - 61%
   12:51:38	Pool 28288533 - 65%
   12:52:12	Pool 28288533 - 70%
   12:52:44	Pool 28288533 - 74%
   12:53:17	Pool 28288533 - 78%
   12:53:49	Pool

In [48]:
training_pool = toloka_client.close_pool(training_pool.id)

# Extract results

In [60]:
answers_df = toloka_client.get_assignments_df(pool.id, exclude_banned=True)
answers_df = answers_df[answers_df['GOLDEN:result'].isnull()]
answers_df = answers_df.rename(columns={
    'INPUT:text': 'task',
    'OUTPUT:result': 'label',
    'ASSIGNMENT:worker_id': 'performer',
})



# Aggregate results

In [61]:
aggregated_answers = DawidSkene(n_iter=100).fit_predict(answers_df)
aggregated_answers = aggregated_answers.reset_index()
aggregated_answers.columns = ['text', 'pred_label']
aggregated_answers = aggregated_answers.merge(df, on='text')
aggregated_answers.head()

Unnamed: 0,text,pred_label,path,label
0,"I think that this short TV series, was absolutely wonderful, and gave both a in-depth and clear explanation of everything that was on the screen at the given time. This was by far David Attenborough at his best. I personally thought this was one of the best documentaries in the past decade. This...",pos,test/pos/3585_10.txt,pos
1,"Kill the scream queen may sound like a good slasher flick but it is terribly boring and very dumb.<br /><br />Kill the scream queen is about a crazy filmmaker who auditions girls to be in his snuff film. He rapes and tortures them. This is trash that is not amusing, suspenseful or entertaining.T...",neg,test/neg/2817_1.txt,neg
2,"Well, okay, maybe not perfect, but it was pretty close. This movie jumped from crime drama to romantic goofball comedy and back again so quickly all the way throughout that it seemed like two different movies that played simultaneously and then joined up again at the end. But they did it smoothl...",pos,test/pos/2847_8.txt,pos
3,"Some movies want to make us think, some want to excite us, some want to exhilarate us. But sometimes, a movie wants only to make us laugh, and ""In & Out"" certainly succeeds in this department.<br /><br />Indiana high-school teacher Howard Brackett (Kevin Kline) is going to be married to fellow t...",pos,test/pos/10922_8.txt,pos
4,"In 1913, in Carlton Mine, Addytown, Pennsylvania, the cruel owner of a mine uses poor children in the exploration and after an explosion, a group of children is buried alive. On the present days, Karen Tunny (Lori Heuring) has just lost her husband after a long period of terminal disease when th...",neg,test/neg/1114_4.txt,neg


# View results

In [62]:
aggregated_answers[['text', 'pred_label', 'label']].sample(5).head()

Unnamed: 0,text,pred_label,label
253,"Don't ask me why I love this movie so much...Maybe it came at a time in my life I desperately wanted to fit in, maybe it is the amazing monster effects, maybe because I enjoyed the novel ""Cabal"", but It's probably because I LOVE Clive Barker. I think it's fair to warn you the movie and the novel...",pos,pos
22,"With a name like ""10 Commandments"" you would expect a film to be representative of the account in the Bible, specifically Exodus. Not so here. This is standard procedure with any Biblical Hallmark-made film. Remember ""Noah""?? That was utter fiction and one of the worst films ever made. At least ...",neg,neg
852,I've seen some bad things in my time. A half dead cow trying to get out of waist high mud; a head on collision between two cars; a thousand plates smashing on a kitchen floor; human beings living like animals.<br /><br />But never in my life have I seen anything as bad as The Cat in the Hat.<br ...,neg,neg
298,"Talk about being boring!<br /><br />I got this expecting a fascinating insight into the life of the man who wrote the mythical Night on the Galactic Railroad. I expected to see crazy stories and hijinks of an eccentric man and to discover his inspirations for such bizarre material. Boy, was I wr...",pos,neg
864,"This Blake Edwards film isn't too sure whether it wants to be a comedy, a drama or a musical. No matter, the sheer presence of Julie Andrews, is reason enough to see this comedy-drama-musical-spy spoof. Julie is beautiful and uses her many talents, throughout the film. Rock Hudson looks tired, b...",pos,pos


# View errors

In [63]:
aggregated_answers[aggregated_answers.pred_label != aggregated_answers.label][['text', 'pred_label', 'label']].sample(5).head()

Unnamed: 0,text,pred_label,label
755,"The only reason I watched this movie a second time, was to learn the name of the ""second banana"" girl playing opposite Katie Holms. Her name is Marisa Coughlan. Never heard of her before. She is lovely. Captivating. With an animated face, and cute bod, she is highly watchable... She's got real, ...",pos,neg
608,It was inferred by a previous poster that the military would not be subordinate to the police in a disaster as depicted in the film. In fact the military role would be to supply aid to the civil authorities when requested to do so. The civil authorities would retain primacy. In practise the Army...,pos,neg
598,On paper this looks a good film . Michael Caine plays a tough and ruthless boxing promoter who's son is up for a title eliminator . The pity is that when the story is transferred from paper to my television screen it loses a certain everything . I had hoped we'd be seen emulating his definitive ...,pos,neg
609,"Young Warriors (1983) <br /><br />While this is a deeply flawed (and in some ways idiotic) movie, the way it continually defies expectations makes it decent viewing for the adventurous sleaze fan.<br /><br />Meet yuppie college student Kevin and his gang of lovable frat boy buddies. In what star...",neg,pos
759,"I have recently gone to the movie theatres to see the new (2007) version of Bridge to Teribithia. After, I went to the library to rent the older version to see it again without paying again. I must say that I was extremely disappointed! I found the older version to have horrible acting as well a...",pos,neg


# Obtain accuracy

In [64]:
accuracy = balanced_accuracy_score(aggregated_answers.label, aggregated_answers.pred_label)
print(f'Accuracy: {accuracy:.2f}')
print(f'Error: {1-accuracy:.2f}')

Accuracy: 0.89
Error: 0.11
