In [1]:
import boto3
import pandas as pd
from logging import log, CRITICAL, ERROR, INFO, DEBUG, WARN
from datetime import datetime, timedelta
pd.set_option('display.max_rows', None)

Setup Environment
Verify that you have set AWS credentials in your user data
Location: ~/.aws/credentials
Verify you are in correct sandbox / production setting
Format:

[default]
aws_access_key_id=XXXXXXX
aws_secret_access_key=XXXXX

In [166]:
def check_response(resp):
    status = resp['ResponseMetadata']['HTTPStatusCode']
    if status != 200:
        log(ERROR, resp)  

SANDBOX = False
if SANDBOX:
    sandbox_ep = 'https://mturk-requester-sandbox.us-east-1.amazonaws.com'
    mturk = boto3.client('mturk', region_name = 'us-east-1', endpoint_url = sandbox_ep)
else:
    mturk = boto3.client('mturk', region_name = 'us-east-1')

def get_balance():
    resp = mturk.get_account_balance()
    check_response(resp)
    balance = resp['AvailableBalance']
    return balance
    
def print_env():
    print("Available Balance: ", get_balance())
    if SANDBOX:
        print("SANDBOX Environment")
    else:
        log(WARN, "PRODUCTION Environment")

In [167]:
print_env()#when I started my trap reminder task



Available Balance:  331.39


### Who has our qualification for a certain task

In [3]:
def check_who_has_qual(qual_id):
    res = mturk.list_workers_with_qualification_type(QualificationTypeId=qual_id,MaxResults=100)
    all_workers = []
    for rec in res['Qualifications']:
        print(rec['WorkerId'], rec["GrantTime"])
        all_workers.append(rec['WorkerId'])
    return all_workers

In [126]:
qual_trap_granted = '3N2J8SDS8LBYPCUOGA88O5UTDU9XYB'
qual_prev_workers = '3BELWBFOS6JPVRXFLT6GVTT3ITD78C'
qual_in_training = '3BVFUZHH5MI7NI1FAMZW4F97L5DINY'
qual_in_training_late = '31GBYMVNTPCPJC37L5IG0G9MXWOYE9' #late workers
prod_gr_1 = '3HKJJ33FJ4GQMSLBB8YRFK2YILFK67'
prod_gr_2 = '37PDZ31J6CH6ZW4TMPLKTKQ4SXPGVR'
prod_gr_3 = '3N54K5T7LDFNU7U3HSWTLQA4L4LYZ7'
prod_gr_4 = '33DXTGLH9F2S00V95Y514YCD24K3PM'
prod_gr_5 = '38T2WS02O0BARM1Z1ZAMWH3A4KVUTW'
valentina = '3YJFRJA91LRKWWZCLW9F85034IOZDE'
consol = '3BELWBFOS6JPVRXFLT6GVTT3JZN870'
prod_consol = '33MAVWP2GR93U8N4YD73RI88DT0ZL8'

In [5]:
def remove_qual(qual_id, workers):
    for wrk in workers:
        mturk.disassociate_qualification_from_worker(WorkerId=wrk, QualificationTypeId=qual_id)
        
def give_qual(qual_id, workers):
    for wrk in workers:
        mturk.associate_qualification_with_worker(WorkerId=wrk, QualificationTypeId=qual_id, IntegerValue=1)

In [None]:
# First run the check qualification cell, then the groups

In [169]:
grp1 = ['A1KGCOR8OXYR72']
grp2 = ['A98E8M4QLI9RS']
grp3 = ['A3B7TNVOISSZ2O']
grp4 = ['AZLZA0Q87TJZO','A2EJ7U3TZAKROG']
grp5 = ['A1FS8SBR4SDWYG','AZLZA0Q87TJZO']

In [139]:
give_qual(prod_consol, consolg)

In [170]:
give_qual(prod_gr_1, grp1)

In [171]:
give_qual(prod_gr_2, grp2) #consolidation

In [172]:
give_qual(prod_gr_3, grp3)

In [173]:
give_qual(prod_gr_4, grp4)

In [184]:
give_qual(consol, ['A3B7TNVOISSZ2O'])

In [185]:
check_who_has_qual(consol)

A1KGCOR8OXYR72 2021-04-16 00:34:38+03:00
A2EJ7U3TZAKROG 2021-04-16 00:34:39+03:00
A3B7TNVOISSZ2O 2021-04-25 20:52:30+03:00
A98E8M4QLI9RS 2021-04-16 00:34:38+03:00


['A1KGCOR8OXYR72', 'A2EJ7U3TZAKROG', 'A3B7TNVOISSZ2O', 'A98E8M4QLI9RS']

In [180]:
print("Group1")
check_who_has_qual(prod_gr_1)
print("Group2")
check_who_has_qual(prod_gr_2)
print("Group3")
check_who_has_qual(prod_gr_3)
print("Group4")
check_who_has_qual(prod_gr_4)
print("Group5")
check_who_has_qual(prod_gr_5)

Group1
A1KGCOR8OXYR72 2021-04-25 20:48:58+03:00
Group2
A98E8M4QLI9RS 2021-04-25 20:48:58+03:00
Group3
A3B7TNVOISSZ2O 2021-04-25 20:48:59+03:00
Group4
A2EJ7U3TZAKROG 2021-04-25 20:48:59+03:00
Group5
AZLZA0Q87TJZO 2021-04-19 20:52:09+03:00
A1FS8SBR4SDWYG 2021-04-19 20:52:09+03:00


['AZLZA0Q87TJZO', 'A1FS8SBR4SDWYG']

In [179]:
remove_qual(prod_gr_4,["AZLZA0Q87TJZO"])

### On sandbox

In [35]:
check_who_has_qual(sandbox_qual)

In [79]:
remove_qual(sandbox_qual, ['A175DRQDVKWQA7'])

In [120]:
give_qual(sandbox_qual, ['A3UENPLNM9AQBK'])

In [121]:
check_who_has_qual(sandbox_qual)

A3UENPLNM9AQBK 2021-01-07 15:20:49+02:00
A175DRQDVKWQA7 2021-01-07 15:19:39+02:00


### Give workers  a qualification

In [10]:
workers = ["A175DRQDVKWQA7"]

In [None]:
prev_work_qual = '3BELWBFOS6JPVRXFLT6GVTT3ITD78C' #qa qualification

In [11]:
for item in workers:
    mturk.associate_qualification_with_worker(WorkerId=item, QualificationTypeId=prev_work_qual, IntegerValue=1)

### check we did it correctly

In [13]:
res = mturk.list_workers_with_qualification_type(QualificationTypeId="3T0EA825IIG57D2Q3MWOV7TVAYGQQ6")
for rec in res['Qualifications']:
    print(rec['WorkerId'], rec["GrantTime"])

A175DRQDVKWQA7 2020-04-06 11:11:57+03:00


cool

In [14]:
mturk.disassociate_qualification_from_worker(WorkerId="A175DRQDVKWQA7", QualificationTypeId="3T0EA825IIG57D2Q3MWOV7TVAYGQQ6")

{'ResponseMetadata': {'RequestId': 'e7a29a75-464f-4a9a-a276-b40945cb899d',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'e7a29a75-464f-4a9a-a276-b40945cb899d',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '2',
   'date': 'Fri, 25 Dec 2020 16:25:36 GMT'},
  'RetryAttempts': 0}}

### Checking status of ongoing hits

In [21]:

def hits_df(hits):
    cols = ['h_id', 't_id', 'status', 'review_status', 'reward', 'title', 'create_date', 'expire_date', 
             'duration_sec', 'auto_approve_sec',
             'assign_available', 'assign_completed','assign_pending', 'MaxAssignments', 
             'id', 'hit_type', 'hit_group_id', 'Keywords', 'Description']
    if not len(hits):
        return pd.DataFrame(columns=cols)
    
    df = pd.DataFrame.from_records(hits)
    df.drop(['QualificationRequirements', 'Question'], axis='columns', inplace=True)

    df.rename(columns={
        'HITId': 'id',
        'HITTypeId': 'hit_type',
        'HITGroupId': 'hit_group_id',
        'HITStatus': 'status',
        'HITReviewStatus': 'review_status',
        'AssignmentDurationInSeconds':'duration_sec', 
        'AutoApprovalDelayInSeconds': 'auto_approve_sec', 
        'NumberOfAssignmentsAvailable': 'assign_available',
        'NumberOfAssignmentsCompleted': 'assign_completed',
        'NumberOfAssignmentsPending': 'assign_pending',
        'CreationTime': 'create_date',
        'Expiration': 'expire_date',
        'Reward': 'reward',
        'Title': 'title'}, inplace=True)

    df['h_id'] = df.id.str.slice(-5)
    df['t_id'] = df.hit_type.str.slice(-5)
    df = df[['h_id', 't_id', 'status', 'review_status', 'reward', 'title', 'create_date', 'expire_date', 
             'duration_sec', 'auto_approve_sec',
             'assign_available', 'assign_completed','assign_pending', 'MaxAssignments', 
             'id', 'hit_type', 'hit_group_id', 'Keywords', 'Description']]
    return df

In [22]:
def refresh():
    print_env()
    hits = []
    resp = mturk.list_hits(MaxResults=100)
    
    has_more = resp["NextToken"]    
    while resp["NumResults"] > 0:
        hits.extend(resp['HITs'])
        next_tok = resp["NextToken"]
        print("Returned: ", resp["NumResults"], " results")
        print("Trying next token: ", next_tok)        
        resp = mturk.list_hits(MaxResults=100, NextToken=next_tok)
            
    hits = hits_df(hits)
    return hits

In [23]:
hits = refresh()



Available Balance:  2810.97
Returned:  100  results
Trying next token:  p2:IkBCy1drkfM/OCxrhuAygKgL848DLaTxTDvLQcFtVY7KPuvbQTzi05wiT7o24o4=
Returned:  100  results
Trying next token:  p2:IZCGP+RgHydpSiuxiFCii5plbP0RqJnhCtAEmF7Am5niTf6jhxz4Q0UvWDQUPAs=
Returned:  100  results
Trying next token:  p2:UJH01Y13kd1N4+KAP1cDmsZdg7OXy2BqkpWDbkyaWO5gvcknH8nJGKSP7ucSZdk=
Returned:  100  results
Trying next token:  p2:/AvEILk/BfVngyRp6uCT3sYWp/QkTK6zIkWCWaQt0WAEGEcOvAMpOq7z726FDuM=
Returned:  100  results
Trying next token:  p2:Q6joIRlLnHHpRwT3Pr3PrUW7FLoUtFwuR+dPLRjKkdi5f771nfi78Lczr4ESwP4=
Returned:  100  results
Trying next token:  p2:v0PvIARd7adigqJbUr+2ox0Z9Vjtnp/HVNzogMhc1Yz+vXKA6ORdatKN4jbhAVA=
Returned:  100  results
Trying next token:  p2:Jz1iWutm+jh/i0iICX/xkqwQLoa0r4LUJ/OZrKc+dVh43PBjewBjS2XvMut5F9s=
Returned:  100  results
Trying next token:  p2:Yi5NbroWfhk1AfUZldF4b5ohd5nFI5s/2FflqImS9JoCpRN4DJa9v16B4Epe/jo=
Returned:  100  results
Trying next token:  p2:tS3iNwdzNNSZ7gDHWR0hDgIDd0drb

In [24]:
hits.status.value_counts()

Reviewable    2372
Assignable       8
Name: status, dtype: int64

In [25]:
hits.columns

Index(['h_id', 't_id', 'status', 'review_status', 'reward', 'title',
       'create_date', 'expire_date', 'duration_sec', 'auto_approve_sec',
       'assign_available', 'assign_completed', 'assign_pending',
       'MaxAssignments', 'id', 'hit_type', 'hit_group_id', 'Keywords',
       'Description'],
      dtype='object')

In [26]:
#hits.groupby(["title", "hit_type", 'status' ]).size().rename("hit_count").reset_index().sort_values("hit_count", ascending=False)
hits.groupby(["title", "hit_type", "status"]).size().rename("hit_count").reset_index().sort_values("hit_count", ascending=False)

Unnamed: 0,title,hit_type,status,hit_count
46,Ohad NLI Color-New Full,3T5H7R2NRCLA85LE47248YWG7AZZWD,Reviewable,324
56,Rewrite Questions Control Overlap,3CM00QMEULN8KV13225TB03L25GW6V,Reviewable,250
51,Ohad NLI Trademarks - Full,3QMCR5SRRUVY8M7P498WSZXJCQX4GW,Reviewable,200
16,Hexagon Board tasks (drawing only) (P02R01-2a)...,3HID22PG0DC0S4ZVJ530Q3GZDLPVSK,Reviewable,193
19,Hexagon Board tasks (drawing only) (P02R01-2d)...,31RF5NHNFOAX2FO7SIMA7TVOWNL1LY,Reviewable,107
47,Ohad NLI Hypernymy-New,3QH037L2CM697XD6SFM5R557YR5WAQ,Reviewable,100
53,Rewrite Questions & Control Overlap,3EKS5N8MHTGS7A19FQYP77N8SX1ZI2,Reviewable,100
52,Rewrite Questions & Control Overlap,34WBNEQO09S9IB1CZZCVZHHWORABRQ,Reviewable,100
61,Rewrite Questions Control Overlap (Low to High),3CS3U1VE5JPKZ3YFG8FB5AQ9JP3S0G,Reviewable,100
55,Rewrite Questions Control Overlap,35RL0GHNS7TG0O4ITFK91L9APFCGEZ,Reviewable,80


In [56]:
our_hits = hits[hits.title.str.contains("Aligning Related Question-Answer Sets in ")][["title",'create_date',"hit_type", 'status','id', 'review_status','assign_available', 'assign_completed', 'assign_pending', 'MaxAssignments']]
our_hits

Unnamed: 0,title,create_date,hit_type,status,id,review_status,assign_available,assign_completed,assign_pending,MaxAssignments
0,Aligning Related Question-Answer Sets in Sente...,2021-02-18 00:37:03+02:00,3ARIN4O78FIM2TA9EE4I4F7QGZ8IFC,Assignable,37SQU136V7P0NHBK6GYA3ZUJ7FB11R,NotReviewed,12,0,0,12
1,Aligning Related Question-Answer Sets in Sente...,2021-02-18 00:37:02+02:00,3ARIN4O78FIM2TA9EE4I4F7QGZ8IFC,Assignable,3VDI8GSXAFUF2WL8NKSSPWLXNA08GI,NotReviewed,12,0,0,12
2,Aligning Related Question-Answer Sets in Sente...,2021-02-18 00:37:02+02:00,3ARIN4O78FIM2TA9EE4I4F7QGZ8IFC,Assignable,3P7QK0GJ3TM47YEOXV7G0CO0C01Z2U,NotReviewed,11,0,0,12
3,Aligning Related Question-Answer Sets in Sente...,2021-02-18 00:37:02+02:00,3ARIN4O78FIM2TA9EE4I4F7QGZ8IFC,Assignable,38O9DZ0A62O2JOUDVQA53EV57W562B,NotReviewed,11,0,0,12
4,Aligning Related Question-Answer Sets in Sente...,2021-02-18 00:37:02+02:00,3ARIN4O78FIM2TA9EE4I4F7QGZ8IFC,Assignable,39WICJI5ATTTSGA71VLLJMTSIE1Z35,NotReviewed,12,0,0,12
5,Aligning Related Question-Answer Sets in Sente...,2021-02-18 00:37:02+02:00,3ARIN4O78FIM2TA9EE4I4F7QGZ8IFC,Assignable,382GHPVPHSSDPJMIG2VVPQGJHJZ43Z,NotReviewed,12,0,0,12
6,Aligning Related Question-Answer Sets in Sente...,2021-02-18 00:37:02+02:00,3ARIN4O78FIM2TA9EE4I4F7QGZ8IFC,Assignable,3X52SWXE0X6DBL1LJ1CFDL347PAWCA,NotReviewed,10,0,1,12
7,Aligning Related Question-Answer Sets in Sente...,2021-02-13 20:32:41+02:00,3ES1UQ6TFXNUY4AWL458RLJRU3FLMZ,Reviewable,32ZCLEW0BZLAZQAQH7F68V8347CPJ8,NotReviewed,5,1,0,6
8,Aligning Related Question-Answer Sets in Sente...,2021-02-13 20:32:41+02:00,3ES1UQ6TFXNUY4AWL458RLJRU3FLMZ,Reviewable,3ZLW647WALW3M57VCOC7WBJLX0Z23N,NotReviewed,6,0,0,6
9,Aligning Related Question-Answer Sets in Sente...,2021-02-13 20:32:41+02:00,3ES1UQ6TFXNUY4AWL458RLJRU3FLMZ,Reviewable,3UUSLRKAUL48QY6YBPZE5BM5ZTA7D6,NotReviewed,6,0,0,6


In [49]:
mturk.list_assignments_for_hit(HITId="3DA79LNS59WX6418V3W5BUQ8WN0T3F")

{'NumResults': 0,
 'Assignments': [],
 'ResponseMetadata': {'RequestId': 'd58a89bb-bcd6-4ae3-a227-360067db17b0',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'd58a89bb-bcd6-4ae3-a227-360067db17b0',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '33',
   'date': 'Sun, 10 Jan 2021 16:30:00 GMT'},
  'RetryAttempts': 0}}

In [41]:
def expire(hit_id):
    distant_past = datetime(year=2000, month=1, day=1)    
    resp = mturk.update_expiration_for_hit(HITId=hit_id, ExpireAt=distant_past)
    check_response(resp)
        
def delete(hit_id):
    resp = mturk.delete_hit(HITId=hit_id)
    check_response(resp)

In [94]:
our_hits[(our_hits.hit_type == GEN) & (our_hits.assign_pending != 0)]

Unnamed: 0,title,hit_type,status,id,review_status,assign_available,assign_completed,assign_pending,MaxAssignments


### Deleting hits

In [None]:
#to_expire = our_hits[(our_hits.hit_type == GEN) & ( hits.status != "Unassignable")].id
to_expire = our_hits[(our_hits.hit_type == QA_ALING)].id
print(len(to_expire))
for id_to_exp in to_expire:
    print(id_to_exp)
    expire(id_to_exp)
    print("deleting...")
    delete(id_to_exp)

### Approving these assignments

In [None]:
mturk.approve_assignment(AssignmentId="3VAOOVPI3ZTQ3VJSS0YELPA82U7LLG")

  """Entry point for launching an IPython kernel.


3IJ95K7NDXD0538CTMHB7TBSSQTGNU
deleting...
3B9XR6P1WEW8VW9GY2X5DB3E3TWJBC
deleting...


## Sending email to workers

In [41]:
'''
trap_workers = ["A98E8M4QLI9RS",
"AZLZA0Q87TJZO",
"A2Q3FS9G8ITCN7",
"A1FS8SBR4SDWYG",
"A3B7TNVOISSZ2O",
"A21LONLNBOB8Q",
"A2EJ7U3TZAKROG",
"A1SX8IVV82M0LW",
"A3TUJHF9LW3M8N",
"A156E3IYUJ5CKK",
"A21X60VQIDZU4D",
"A3VBNWON5XOUVS",
"A2XWQY45UB1XQ6"]'''
more_trap_ppl = ['A3IQRBKS1DUJVZ']

In [6]:
me = ['A175DRQDVKWQA7']

In [42]:
response = mturk.notify_workers(
    Subject='We\'d like to invite you to participate in further training of our task!',
    MessageText='Hello,\n\n\
\
We were happy with your performance on the task: "Aligning Question-Answer Sets",\n\
where you had to read two sentences carefully, and align question-answer statements that expressed the same meaning.\n\
This task is used for research on information consolidation, which will hopefully aid in summarization research and other important language understanding tasks.\n\n\
\
Because this task is semantically challenging and non-trivial, we require workers who choose to continue with us to do large-scale annotations, to first go through a 30 minute reading of guidelines, and a 2 to 3 training rounds with feedback for every round. \n\
Those who finish the complete trainings will receive a 5$ bonus upon finishing this process, and will go on to annotate larger-scale annotations.\n\n\
\
Those that perform well and listen to feedbacks in training will continue on to large-scale annotation batches.\n\
If you are interested in this task and would like to start training, please reply to this email with your confirmation (plus your WorkerId).\n\n\
 \
We look forward to hearing from you, \n\
The Language and Understanding Lab',
    WorkerIds=more_trap_ppl
)

In [40]:
response

{'NotifyWorkersFailureStatuses': [],
 'ResponseMetadata': {'RequestId': 'd3debbe2-bd8a-4c2d-9c07-ec7f845edf78',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'd3debbe2-bd8a-4c2d-9c07-ec7f845edf78',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '35',
   'date': 'Sun, 14 Feb 2021 10:21:50 GMT'},
  'RetryAttempts': 0}}

### Paying bonuses to QASRL workers

In [212]:
def calculate_who_needs_bonus(GEN):
    import re
    import json
    qs_to_bonus = []
    for hit_id in hits[hits.hit_type == GEN].id.tolist():    
        assigns = mturk.list_assignments_for_hit(HITId=hit_id)["Assignments"]
        approved_assigns = [a for a in assigns if a['AssignmentStatus'] == "Approved"]
        for a in approved_assigns:
            xml_answer = a['Answer']
            #         best xml parsing ever

            free_text = re.findall(r"<FreeText>(.*)</FreeText>", xml_answer)[0]
            try:
                free_json = json.loads(free_text)
            except:
                # PEOPLE WITH FEEDBACK UGGGH
                print(free_text)            
            n_questions = len(free_json)
            qs_to_bonus.append({'worker_id': a['WorkerId'], "n_questions": n_questions})
    df = pd.DataFrame(qs_to_bonus).sort_values("worker_id")
    return df

In [None]:
df_for_bonus = calculate_who_needs_bonus('37AIU3VBY7PGVE4D4V4DOE8O6YDXR7')

In [None]:
df_for_bonus

In [247]:
def generationBonus(numQs):
    # no bonus for the first question, hence -1
    accum_cents = 5 #cents
    total_cents = sum([accum_cents+i for i in range(numQs-1)])
    return total_cents
    
  

In [285]:
df_for_bonus['bonus_cents'] = df_for_bonus['n_questions'].apply(lambda x: generationBonus(x))

### Amount to pay for trap in bonuses

In [287]:
df_for_bonus.groupby("worker_id").sum()

Unnamed: 0_level_0,n_questions,bonus_cents
worker_id,Unnamed: 1_level_1,Unnamed: 2_level_1
A21LONLNBOB8Q,52,199
A3TUJHF9LW3M8N,48,172


In [65]:
df_for_bonus.bonus_cents.mean()

12.483516483516484

In [289]:
df_for_bonus[df_for_bonus.worker_id == 'A3TUJHF9LW3M8N'].sum()

worker_id      A3TUJHF9LW3M8NA3TUJHF9LW3M8NA3TUJHF9LW3M8NA3TU...
n_questions                                                   48
bonus_cents                                                  172
dtype: object