# Introduction

This notebook will walk you through creating and monitoring your HITs. 

It provides methods to create HITs, pretty-print HIT and assignment status, expire/edit HITs, create qualifications, and download collected data. 

Before continuing, make sure that you have read the README and set all config fields to their desired values.

## Requirements: 

This code requires Python3 and the following packages: 
- boto3 
- beautiful soup 4

Before using, you will have to set up an authentication key to use the Amazon API and include it in a credentials file. See here: https://aws.amazon.com/developers/getting-started/python/

# Setup

Read the config file and establish a connection to MTurk.

A connection is made to production or to the sandbox based on values in the config. 

In [1]:
import datetime
import boto3
import json
import copy
import pprint
from bs4 import BeautifulSoup as bs 
from uuid import uuid4

In [2]:
# path to the config file 
CONFIG_PATH = "configNotebook.json"

# where to save downloaded results 
SAVE_PATH = "assignments_notSandbox_contrast.json" 

# Sandbox or Production? You only spend money in Production.
USING_PROD = True

In [9]:
# Safety flags that prevent you from accidentally messing up your HITs. 
# Set to False except when you are performing these specific tasks.
ALLOW_HIT_CREATION = False
ALLOW_ASSIGNMENT_ADDITION = False
ALLOW_CREATE_QUAL = False
ALLOW_UPDATE_EXPIRATION = False

In [4]:
# Read config and extract relevant settings
with open(CONFIG_PATH, 'r') as f:
    config = json.loads(f.read())
    
hit_config = config['hitCreation']

external_submit = config['advanced']['externalSubmit']
    
hit_url = hit_config['taskUrl']

if USING_PROD:
    print("USING PROD")
    endpoint_url = 'https://mturk-requester.us-east-1.amazonaws.com'
    origin="production"
else:
    print("USING SANDBOX")
    endpoint_url = 'https://mturk-requester-sandbox.us-east-1.amazonaws.com'
    origin="sandbox"

# If using an external link, add a querystring origin=sandbox or origin=production 
# for use in your js logic if you want. Not done for MTurk submits because it breaks the submit link
if external_submit: 
    hit_url = "%s?origin=%s" % (hit_url, origin)

if external_submit:
    print("Configuring task as external link with data submitted to: %s" % config['advanced']['externalSubmitUrl'])
else:
    print("Configuring task as an iframe within Mturk")
print("TASK URL: " + hit_url)

session = boto3.session.Session(profile_name='default')
cl = session.client('mturk', region_name='us-east-1', endpoint_url=endpoint_url)

USING PROD
Configuring task as an iframe within Mturk
TASK URL: https://smalpica.github.io/spatialContrastWebSurvey/


# Make new HIT

In [5]:
# List of qualifications that you will use to filter potential workers. 
# These require that workers come from the US and have an approval rating >= 95%
# Edit this list to specify different qualifications for workers 
QUALS = [
    #{
    #    'QualificationTypeId': '00000000000000000071',
    #    'Comparator': 'EqualTo',
    #    'LocaleValues': [{
    #        'Country': 'US',
    #    }],
    #},
    {
        'QualificationTypeId': '000000000000000000L0',
        'Comparator': 'GreaterThanOrEqualTo',
        'IntegerValues': [
            95
        ],
    },
]

In [6]:
# Helpers for creating HITs. 

# generic helper that sets metadata fields based on the config file.
def create_hit(task, questionText, quals=QUALS): 
    response = cl.create_hit(
        MaxAssignments=task['numAssignments'],
        AutoApprovalDelayInSeconds=604800,
        LifetimeInSeconds=task['lifetime'],
        AssignmentDurationInSeconds=task['duration'],
        Reward=task['rewardAmount'],
        Title=task['title'],
        Keywords=task['keywords'],
        Description=task['description'],
        Question=questionText,
        QualificationRequirements=quals,
    )
    print(response)
    print("\n")

# creates a HIT in the form of an External Question inside an iFrame
def create_hit_iframe(task):
    questionText = "<ExternalQuestion xmlns=\"http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/"
    questionText += "2006-07-14/ExternalQuestion.xsd\">\n<ExternalURL>" + task['taskUrl']
    questionText += "</ExternalURL>\n  <FrameHeight>1200</FrameHeight>\n</ExternalQuestion>"
    create_hit(task, questionText)
    
# Helper to create a HIT in the form of a simple UI with a link to an external page and an
# input box for a completion code 
def create_hit_external(task):
    with open('questionform_template.xml', 'r') as myfile:
        template=myfile.read() 
    question_xml = template % (hit_config["title"], hit_config["description"], hit_url)
    create_hit(task, question_xml)

In [8]:
# Use this cell to launch your HIT! 
if ALLOW_HIT_CREATION: 
    if not (hit_config.get('variants', False) or hit_config.get('numTasks', False)): 
        raise RuntimeError("You must specify either hitCreation.numTasks or hitCreation.variants in your config.json file")
    
    hit_creation_function = create_hit_external if external_submit else create_hit_iframe
    
    if hit_config.get('numTasks', False): 
        print("creating " + str(hit_config['numTasks']) + " tasks")
        for i in range(hit_config['numTasks']):
            hit_creation_function(hit_config)
    else: 
        print("creating " + str(len(config['variants'])) + " variants")
        for var in hit_config['variants']: 
            task = copy.deepcopy(config)
            task.update(var)
            hit_creation_function(task)
else: 
    raise RuntimeError("This action is not currently enabled; set `ALLOW_HIT_CREATION` to true to proceed with this action")

creating 25 tasks
{'HIT': {'HITId': '3O71U79SRDDDJ01RIKT5Y9YC486MSP', 'HITTypeId': '3VL3OSLWYJNXNYFUOQM6AJKTGJU1GJ', 'HITGroupId': '39PLFW4P3OQ9U1WHKRSKJKXQH0FH4B', 'CreationTime': datetime.datetime(2019, 5, 21, 12, 37, 9, tzinfo=tzlocal()), 'Title': 'Image blocks', 'Description': 'Watch several blocks of images and answer questions about them', 'Question': '<ExternalQuestion xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2006-07-14/ExternalQuestion.xsd">\n<ExternalURL>https://smalpica.github.io/spatialContrastWebSurvey/</ExternalURL>\n  <FrameHeight>1200</FrameHeight>\n</ExternalQuestion>', 'Keywords': 'images, 2D, questions', 'HITStatus': 'Assignable', 'MaxAssignments': 1, 'Reward': '2.00', 'AutoApprovalDelayInSeconds': 604800, 'Expiration': datetime.datetime(2019, 5, 22, 12, 37, 9, tzinfo=tzlocal()), 'AssignmentDurationInSeconds': 4000, 'QualificationRequirements': [{'QualificationTypeId': '000000000000000000L0', 'Comparator': 'GreaterThanOrEqualTo', 'Intege

{'HIT': {'HITId': '34KYK9TV2TWH09XURV3WKTNIS92SB6', 'HITTypeId': '3VL3OSLWYJNXNYFUOQM6AJKTGJU1GJ', 'HITGroupId': '39PLFW4P3OQ9U1WHKRSKJKXQH0FH4B', 'CreationTime': datetime.datetime(2019, 5, 21, 12, 37, 10, tzinfo=tzlocal()), 'Title': 'Image blocks', 'Description': 'Watch several blocks of images and answer questions about them', 'Question': '<ExternalQuestion xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2006-07-14/ExternalQuestion.xsd">\n<ExternalURL>https://smalpica.github.io/spatialContrastWebSurvey/</ExternalURL>\n  <FrameHeight>1200</FrameHeight>\n</ExternalQuestion>', 'Keywords': 'images, 2D, questions', 'HITStatus': 'Assignable', 'MaxAssignments': 1, 'Reward': '2.00', 'AutoApprovalDelayInSeconds': 604800, 'Expiration': datetime.datetime(2019, 5, 22, 12, 37, 10, tzinfo=tzlocal()), 'AssignmentDurationInSeconds': 4000, 'QualificationRequirements': [{'QualificationTypeId': '000000000000000000L0', 'Comparator': 'GreaterThanOrEqualTo', 'IntegerValues': [95], 

{'HIT': {'HITId': '36AZSFEYZ6OXS1EYLQ81VG7PRDWVBE', 'HITTypeId': '3VL3OSLWYJNXNYFUOQM6AJKTGJU1GJ', 'HITGroupId': '39PLFW4P3OQ9U1WHKRSKJKXQH0FH4B', 'CreationTime': datetime.datetime(2019, 5, 21, 12, 37, 11, tzinfo=tzlocal()), 'Title': 'Image blocks', 'Description': 'Watch several blocks of images and answer questions about them', 'Question': '<ExternalQuestion xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2006-07-14/ExternalQuestion.xsd">\n<ExternalURL>https://smalpica.github.io/spatialContrastWebSurvey/</ExternalURL>\n  <FrameHeight>1200</FrameHeight>\n</ExternalQuestion>', 'Keywords': 'images, 2D, questions', 'HITStatus': 'Assignable', 'MaxAssignments': 1, 'Reward': '2.00', 'AutoApprovalDelayInSeconds': 604800, 'Expiration': datetime.datetime(2019, 5, 22, 12, 37, 11, tzinfo=tzlocal()), 'AssignmentDurationInSeconds': 4000, 'QualificationRequirements': [{'QualificationTypeId': '000000000000000000L0', 'Comparator': 'GreaterThanOrEqualTo', 'IntegerValues': [95], 

{'HIT': {'HITId': '3XAOZ9UYR1F4AV0PXUL2JHV8O9R1Q8', 'HITTypeId': '3VL3OSLWYJNXNYFUOQM6AJKTGJU1GJ', 'HITGroupId': '39PLFW4P3OQ9U1WHKRSKJKXQH0FH4B', 'CreationTime': datetime.datetime(2019, 5, 21, 12, 37, 12, tzinfo=tzlocal()), 'Title': 'Image blocks', 'Description': 'Watch several blocks of images and answer questions about them', 'Question': '<ExternalQuestion xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2006-07-14/ExternalQuestion.xsd">\n<ExternalURL>https://smalpica.github.io/spatialContrastWebSurvey/</ExternalURL>\n  <FrameHeight>1200</FrameHeight>\n</ExternalQuestion>', 'Keywords': 'images, 2D, questions', 'HITStatus': 'Assignable', 'MaxAssignments': 1, 'Reward': '2.00', 'AutoApprovalDelayInSeconds': 604800, 'Expiration': datetime.datetime(2019, 5, 22, 12, 37, 12, tzinfo=tzlocal()), 'AssignmentDurationInSeconds': 4000, 'QualificationRequirements': [{'QualificationTypeId': '000000000000000000L0', 'Comparator': 'GreaterThanOrEqualTo', 'IntegerValues': [95], 

{'HIT': {'HITId': '3LCXHSGDLVUF7C9E1M5ML1T3V6FSE6', 'HITTypeId': '3VL3OSLWYJNXNYFUOQM6AJKTGJU1GJ', 'HITGroupId': '39PLFW4P3OQ9U1WHKRSKJKXQH0FH4B', 'CreationTime': datetime.datetime(2019, 5, 21, 12, 37, 13, tzinfo=tzlocal()), 'Title': 'Image blocks', 'Description': 'Watch several blocks of images and answer questions about them', 'Question': '<ExternalQuestion xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2006-07-14/ExternalQuestion.xsd">\n<ExternalURL>https://smalpica.github.io/spatialContrastWebSurvey/</ExternalURL>\n  <FrameHeight>1200</FrameHeight>\n</ExternalQuestion>', 'Keywords': 'images, 2D, questions', 'HITStatus': 'Assignable', 'MaxAssignments': 1, 'Reward': '2.00', 'AutoApprovalDelayInSeconds': 604800, 'Expiration': datetime.datetime(2019, 5, 22, 12, 37, 13, tzinfo=tzlocal()), 'AssignmentDurationInSeconds': 4000, 'QualificationRequirements': [{'QualificationTypeId': '000000000000000000L0', 'Comparator': 'GreaterThanOrEqualTo', 'IntegerValues': [95], 

# HIT monitoring helpers

Helper functions that will be useful for monitoring the status of your HIT. See next section for how to use them.

In [10]:
# Contacts MTurk API to get all assignments for a HIT
# Returns them in a list. 
def get_all_assignments(hitid): 
    assignments = []
    should_continue = True
    next_token = False
    while (should_continue): 
        args = {
            'HITId': hitid, 
            'MaxResults': 100
        }
        if (next_token): 
            args['NextToken'] = next_token
        r = cl.list_assignments_for_hit(**args)
        next_token = r.get('NextToken', False)
        assignments.extend(r["Assignments"])
        should_continue = len(r["Assignments"]) > 0
    return assignments

# Summarizes all hits in `hits` in a human-readable way. 
# Prints out the HIT Title, id, if it is expired, and how many assignments it has
# completed, pending, and left for work. 
def summarize_hits(hits): 
    print(len(hits))
    ret = ""
    for hit in hits: 
        expiration = hit['Expiration'].replace(tzinfo=None)
        is_expired = expiration < datetime.datetime.now()
        description = ("Title: {title}\n" 
        "ID: {hid}\n"
        "\tAssignments left: {left}\n"
        "\tAssignments completed: {complete}\n"
        "\tAssignments pending: {pending}\n"
        "\tExpired: {exp}\n\n").format(
            title=hit['Title'], 
            hid=hit['HITId'], 
            left=hit['NumberOfAssignmentsAvailable'], 
            complete=hit['NumberOfAssignmentsCompleted'], 
            pending=hit['NumberOfAssignmentsPending'],
            exp=str(is_expired)
        )
        ret += description
    print(ret)
    
# Prints a human-readable summary of all pending/submitted/approved assignments for all hits in `hits`
def summarize_assignments(hits):
    ret = ""
    for hit in hits: 
        hid = hit['HITId']
        title =  hit['Title']
        name = "HIT %s: %s" % (hid, title)
        ret += name + "\n"
        assignments = get_all_assignments(hid)
        if len(assignments) == 0: 
            ret += "\tNo pending/submitted/approved assignments for this HIT\n"
        for a in assignments: 
            desc = "\tAssignment {aid}\n\t\tStatus: {status}\n".format(aid=a['AssignmentId'], status=a['AssignmentStatus'])
            ret += desc
    print(ret)
    
# Refreshes data about the requested hits
def refresh_hits(): 
    global hits 
    global MAX_RESULTS
    hits = cl.list_hits(MaxResults=MAX_RESULTS)['HITs']

# HIT monitoring

In [11]:
MAX_RESULTS = 25 # set equal to the number of outstanding hits you have 

# API call to grab HIT data from MTurk 
hits = cl.list_hits(MaxResults=MAX_RESULTS)['HITs']

In [12]:
# Summarizes all outstanding HITs
refresh_hits()
summarize_hits(hits)

25
Title: Image blocks
ID: 3INZSNUD82ESARZFWLUBYO6S36Q9DK
	Assignments left: 0
	Assignments completed: 0
	Assignments pending: 1
	Expired: False

Title: Image blocks
ID: 3SMIWMMK63TP3PN9BRUSM8IRN75WU6
	Assignments left: 0
	Assignments completed: 0
	Assignments pending: 1
	Expired: False

Title: Image blocks
ID: 3LCXHSGDLVUF7C9E1M5ML1T3V6FSE6
	Assignments left: 0
	Assignments completed: 0
	Assignments pending: 1
	Expired: False

Title: Image blocks
ID: 3IZVJEBJ6C88V92DQWGW56YT1NRZ6X
	Assignments left: 0
	Assignments completed: 0
	Assignments pending: 1
	Expired: False

Title: Image blocks
ID: 3BCRDCM0OFID6E3RQVDMIRAGXEF6KJ
	Assignments left: 0
	Assignments completed: 0
	Assignments pending: 1
	Expired: False

Title: Image blocks
ID: 3X7837UUAFMIVUX98ADAHCA3IPF6JK
	Assignments left: 0
	Assignments completed: 0
	Assignments pending: 1
	Expired: False

Title: Image blocks
ID: 30U1YOGZGCKAF6VE5RU2FJBIVUESDD
	Assignments left: 0
	Assignments completed: 0
	Assignments pending: 1
	Expired: Fal

In [13]:
# Summarizes assignments for all oustanding HITs 
refresh_hits()
summarize_assignments(hits)

HIT 3INZSNUD82ESARZFWLUBYO6S36Q9DK: Image blocks
	No pending/submitted/approved assignments for this HIT
HIT 3SMIWMMK63TP3PN9BRUSM8IRN75WU6: Image blocks
	No pending/submitted/approved assignments for this HIT
HIT 3LCXHSGDLVUF7C9E1M5ML1T3V6FSE6: Image blocks
	No pending/submitted/approved assignments for this HIT
HIT 3IZVJEBJ6C88V92DQWGW56YT1NRZ6X: Image blocks
	No pending/submitted/approved assignments for this HIT
HIT 3BCRDCM0OFID6E3RQVDMIRAGXEF6KJ: Image blocks
	No pending/submitted/approved assignments for this HIT
HIT 3X7837UUAFMIVUX98ADAHCA3IPF6JK: Image blocks
	No pending/submitted/approved assignments for this HIT
HIT 30U1YOGZGCKAF6VE5RU2FJBIVUESDD: Image blocks
	No pending/submitted/approved assignments for this HIT
HIT 3QGTX7BCHRQCC52ZLD6RD0KT5SGZ5H: Image blocks
	No pending/submitted/approved assignments for this HIT
HIT 3XAOZ9UYR1F4AV0PXUL2JHV8O9R1Q8: Image blocks
	No pending/submitted/approved assignments for this HIT
HIT 3XBYQ44Z6RSA3C8KBM4IHCWP4TYWTF: Image blocks
	No pe

# Approve HITs

Approves all outstanding assignments for the HITs displayed above. 

In [10]:
def approve_all(hits): 
    num_approved = 0
    for hit in hits: 
        # make sure you keep getting assignments 
        assignments = get_all_assignments(hit["HITId"])
        #print(assignments)
        for a in assignments: 
            if a['AssignmentStatus'] != 'Approved':
                print("Approving assignment")
                num_approved += 1
                cl.approve_assignment(AssignmentId=a['AssignmentId'])
    print("Approved %d assignments" % num_approved)

In [56]:
refresh_hits()
approve_all(hits)
print(cl.get_account_balance()['AvailableBalance'])

Approving assignment
Approved 1 assignments
200.80


# Update expiration or num tasks

In [71]:
# changes the expiration date on a HIT to days_from_now days in the future
def update_expiration(hitid, days_from_now): 
    if ALLOW_UPDATE_EXPIRATION: 
        days = days_from_now*datetime.timedelta(days=1)
        expire_time = datetime.datetime.now() + days

        response = cl.update_expiration_for_hit(HITId=hitid, ExpireAt=expire_time)
        print(response)
        return response
    else: 
        raise RuntimeError("This action is not currently enabled; set `ALLOW_UPDATE_EXPIRATION` to true to proceed with this action")
    
def expire_hit(hit): 
    return update_expiration(hit, -10)

In [72]:
def add_assignments(hitid, num_assignments): 
    if ALLOW_ASSIGNMENT_ADDITION: 
        response = cl.create_additional_assignments_for_hit(
            HITId=hitid,
            NumberOfAdditionalAssignments=num_assignments
        )
        print(response)
        return response
    else: 
        raise RuntimeError("This action is not currently enabled; set `ALLOW_ASSIGNMENT_ADDITION` to true to proceed with this action")

In [93]:
# Use this cell to expire a HIT 
#HIT_id_to_expire = "FILL THIS IN" 
HIT_id_to_expire ='3TCFMTM8HGCNICY4VJV1NZX58PF215'
expire_hit(HIT_id_to_expire)
HIT_id_to_expire ='3QI9WAYOGSZBUXGZV4MO9D1QEUU6SA'
expire_hit(HIT_id_to_expire)
HIT_id_to_expire ='3MZ3TAMYTNBFM2BNIGV8G6MZ1J5RI0'
expire_hit(HIT_id_to_expire)
HIT_id_to_expire ='33TGB4G0LR5PSCDALR04JVAWDH6TXF'
expire_hit(HIT_id_to_expire)
HIT_id_to_expire ='3NKW03WTLOVXBWGSUIANC7L2K81WQB'
expire_hit(HIT_id_to_expire)
HIT_id_to_expire ='33QQ60S6AU6WVZMOIK17FD9NMOSU0C'
expire_hit(HIT_id_to_expire)
HIT_id_to_expire ='3BJKPTD2QE05VGRYCSUCUDO8C0LRTH'
expire_hit(HIT_id_to_expire)
HIT_id_to_expire ='38LRF35D5NKSCRLL17JQHB7UP0CU37'
expire_hit(HIT_id_to_expire)
HIT_id_to_expire ='3XDJY5RK5U9DUN4P1LO9RGZ0308U4S'
expire_hit(HIT_id_to_expire)
HIT_id_to_expire ='3TUOHPJXYJLHKTH4GW5F6FHVHBOWX8'
expire_hit(HIT_id_to_expire)
HIT_id_to_expire ='3CZH926SIE2W5R73BL94NYDXHCF4EM'
expire_hit(HIT_id_to_expire)
HIT_id_to_expire ='3GS542CVJXBX6EKMHFCS0CI0QBU95M'
expire_hit(HIT_id_to_expire)


{'ResponseMetadata': {'RequestId': 'ff7a3b01-7c31-4bc4-a5e3-eb5df89c3236', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'ff7a3b01-7c31-4bc4-a5e3-eb5df89c3236', 'content-type': 'application/x-amz-json-1.1', 'content-length': '2', 'date': 'Thu, 16 May 2019 21:46:28 GMT'}, 'RetryAttempts': 0}}
{'ResponseMetadata': {'RequestId': '2e30c5e4-ead9-4bf3-8d53-c3131422c20a', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '2e30c5e4-ead9-4bf3-8d53-c3131422c20a', 'content-type': 'application/x-amz-json-1.1', 'content-length': '2', 'date': 'Thu, 16 May 2019 21:46:28 GMT'}, 'RetryAttempts': 0}}
{'ResponseMetadata': {'RequestId': '06638305-6af3-48dd-961c-db37f8d7351a', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '06638305-6af3-48dd-961c-db37f8d7351a', 'content-type': 'application/x-amz-json-1.1', 'content-length': '2', 'date': 'Thu, 16 May 2019 21:46:28 GMT'}, 'RetryAttempts': 0}}
{'ResponseMetadata': {'RequestId': '4fa1219b-53f5-4f19-9d9b-9180264e071b', 'HTTPSt

{'ResponseMetadata': {'RequestId': 'efba3604-eb43-43b5-add5-655ce10c31c8',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'efba3604-eb43-43b5-add5-655ce10c31c8',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '2',
   'date': 'Thu, 16 May 2019 21:46:29 GMT'},
  'RetryAttempts': 0}}

In [None]:
# Use this cell to add assignments to a HIT 
HIT_id_to_add_assignments = "FILL THIS IN"
num_assignments_to_add = 0
add_assignments(HIT_id_to_add_assignments, num_assignments_to_add)

# Add custom qualifications 

## Add a qualification to disqualify workers who have done work before

- uses "negative qualification" method from https://github.com/cloudyr/MturkR/wiki/qualifications-as-blocks

#### NOTE: quals are kept separate for the sandbox and prod. Make sure you are creating and assigning your quals in prod. 

### Structure of a new qualification

In [64]:
NEW_QUAL = {
    'Name': 'Auto-Granted Qualification to Prevent Retakes',
    'Keywords': 'Worked for me before',
    'Description': 'This qualification is for people who have worked for me on this task before. It is granted automatically to new workers.',
    'QualificationTypeStatus': 'Active',
    'AutoGranted': True,
    'value': 100
}

### Helpers for creating, viewing, and assigning qualifications

In [84]:
# Registers a custom qualification with MTurk 
def create_qual(new_qual):
    if ALLOW_CREATE_QUAL: 
        response = cl.create_qualification_type(**new_qual)
        print(response)
        Id = response['QualificationType']['QualificationTypeId']
        print("id", Id)
        return Id
    else: 
        raise RuntimException("This action is not currently enabled; set `ALLOW_CREATE_QUAL` to true to proceed with this action")
        
# Gets all the custom quals you have created and prints them
def list_quals(): 
    response = cl.list_qualification_types(
            Query='hasCompletedVisualGraphRecallTask',
            MustBeRequestable=False
    )
    print(response)
    
# Assigns a qualification to a worker 
def assign_qual(qual_id, worker_ids): 
    for worker in worker_ids: 
        response = cl.associate_qualification_with_worker(
                QualificationTypeId=qual_id, 
                WorkerId=worker,
                IntegerValue=1,
                SendNotification=False
        )
        print(response)
        assert response
        
# Gets the ids of all workers who worked on a particular hit 
def get_workers_for_hit(hitid): 
    a = get_all_assignments(hitid)
    workers = [a_['WorkerId'] for a_ in a]
    return workers
    
# Confirms that every worker in worker_ids has qual with qual_id
def confirm_quals(qual_id, worker_ids): 
    for w in worker_ids: 
        response = cl.get_qualification_score(
                QualificationTypeId=qual_id,
                WorkerId=w
        )
        response = response['Qualification']
        assert response['Status'] == 'Granted'
        assert response['IntegerValue'] == 1
        
# Assigns qual with `qual_id` to every worker who has completed an assignment for the hit with `hitid`
def assign_qual_for_hit(hitid, qual_id): 
    workers = get_workers_for_hit(hitid)
    print("got workers")
    assign_qual(qual_id, workers)
    print("assigned qual")
    confirm_quals(qual_id, workers)
    print("confirmed qual")

### Use the following cells to manipulate qualifications

In [66]:
# Use this cell to view the custom qualifications you have created
list_quals()

{'NumResults': 1, 'NextToken': 'p1:w9l2UKZr5Av85xagEHdbUdM5R6XCxRCMWTv+Cjd2VB9OBhTwWmR+7lAbph8rCA==', 'QualificationTypes': [{'QualificationTypeId': '3Q3I6L0BKOF89DJMPYQCYLJBC2VW5E', 'CreationTime': datetime.datetime(2018, 3, 23, 14, 19, 7, tzinfo=tzlocal()), 'Name': 'hasCompletedVisualGraphRecallTask', 'Description': 'Assigned to people who have already completed work on a visual graph recall task. A worker can only complete this type of task once.', 'Keywords': 'Already completed visual graph recall task', 'QualificationTypeStatus': 'Active', 'IsRequestable': True, 'AutoGranted': False}], 'ResponseMetadata': {'RequestId': 'f271ce5b-1893-40c3-9794-5f1599cd33f6', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'f271ce5b-1893-40c3-9794-5f1599cd33f6', 'content-type': 'application/x-amz-json-1.1', 'content-length': '531', 'date': 'Tue, 14 May 2019 21:17:10 GMT'}, 'RetryAttempts': 0}}


In [None]:
# Use this cell to create a new qual 
qual_to_create = {}
create_qual(qual_to_create)

In [None]:
# Use this cell to assign a custom qual to every worker who has done a specific HIT
hit_id = "FILL THIS IN"
qual_id_to_assign = "FILL THIS IN"
assign_qual_for_hit(hit_id, qual_id_to_assign)

# Create Compensation HIT

Mistakes happen, and sometimes they can lead to a worker who put in an honest effort being unable to complete a task and get paid. It's a good idea to compensate these workers when they reach out because it helps maintain relations with workers and is the right thing to do.

However, workers can only be paid upon completing a task. The workaround is to create a custom qualification, assign it to the worker you want to compensate, and create a no-work HIT requiring the custom qualification. This code does that.

In [82]:
# worker_ids is str[]
# compensation is str but should match the regex ^\d*\.\d\d$ (e.g. "1.00")
# for_hit_id is str -- optional, but helpful for records
def compensate_workers(worker_ids, compensation, for_hit_id=""):
    with open('compensation.xml', 'r') as myfile:
        question_xml=myfile.read()

    keywords = 'compensation'
    description = 'Compensation for HIT'
    if for_hit_id:
        keywords += ', ' + for_hit_id
        description += ' ' + for_hit_id

    # create qual, assign to workers
    custom_qual = {
        'Name': str(uuid4()), # a qual must have a unique name
        'Keywords': keywords,
        'Description': description,
        'QualificationTypeStatus': 'Active',
        'AutoGranted': False
    }
    qual_id = create_qual(custom_qual)
    assign_qual(qual_id, worker_ids)

    # create HIT requiring qual
    task = {
        'numAssignments': len(worker_ids),
        'lifetime': 3 * 24 * 60 * 60, # 3 days
        'duration': 60 * 60, # 60 min
        'rewardAmount': compensation,
        'title': description,
        'keywords': keywords,
        'description': description,
    }
    quals = [{
        'QualificationTypeId': qual_id,
        'Comparator': 'Exists',
        'ActionsGuarded': 'DiscoverPreviewAndAccept'
    }]
    create_hit(task, question_xml, quals)

In [88]:
worker_ids = ['A2INXY39KBM92F'] # worker_id strings in a list
compensation = "2.00" # change to the amount of dollars you want to give
for_hit_id = "imageblocks" # hit_id string (what you are compensating for)compensate_workers(worker_ids, compensation, for_hit_id)
#compensate_workers(worker_ids,compensation,for_hit_id)



{'QualificationType': {'QualificationTypeId': '3ETJLUMS0F5LYF7TTJ18UMSPR0K3UN', 'CreationTime': datetime.datetime(2019, 5, 16, 14, 43, 23, tzinfo=tzlocal()), 'Name': '90bb4f98-5787-4851-a49e-bf2183e19042', 'Description': 'Compensation for HIT imageblocks', 'Keywords': 'compensation, imageblocks', 'QualificationTypeStatus': 'Active', 'IsRequestable': True, 'AutoGranted': False}, 'ResponseMetadata': {'RequestId': 'f1998044-fc1c-45b5-b11a-10c5eb008529', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'f1998044-fc1c-45b5-b11a-10c5eb008529', 'content-type': 'application/x-amz-json-1.1', 'content-length': '317', 'date': 'Thu, 16 May 2019 21:43:23 GMT'}, 'RetryAttempts': 0}}
id 3ETJLUMS0F5LYF7TTJ18UMSPR0K3UN
{'ResponseMetadata': {'RequestId': '08cabb7c-5992-44de-8185-699301ae147c', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '08cabb7c-5992-44de-8185-699301ae147c', 'content-type': 'application/x-amz-json-1.1', 'content-length': '2', 'date': 'Thu, 16 May 2019 21:43:23 

# Download data

Helper to download data from MTurk 

In [26]:
def pretty_print(obj):
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(obj)
    pp = None

# Downloads all the assignments completed for `hits` as a list of dictionaries. 
# If a download_path is given, also saves that data as json 
def get_assignment_content(hits, download_path="", should_print=False): 
    all_responses = []
    for hit in hits: 
        hitid = hit['HITId']
        assignments = get_all_assignments(hitid)
        for a in assignments:
            a_xml = a['Answer']
            #print(a_xml)
            soup = bs(a_xml, "html.parser")
            answers = soup.find_all("answer")
            #print(answers)
            results = {'HITId': a['HITId'], 'AssignmentId': a['AssignmentId'], 'WorkerId': a['WorkerId']}
            for ans in answers: 
                identifier = ans.find('questionidentifier').string
                answer = ans.find('freetext').string
                try: 
                    results[identifier] = json.loads(answer)
                except:
                    results[identifier] = answer
            all_responses.append(results)
    if should_print: 
        pretty_print(all_responses)
    if download_path: 
        with open(download_path, 'w') as outfile: 
            json.dump(all_responses, outfile)
    return all_responses
            

In [57]:
# Use this cell to download data
responses = get_assignment_content(hits, download_path=SAVE_PATH, should_print=True)
with open(SAVE_PATH, 'w') as outfile: 
    json.dump(responses, outfile)
pass

[   {   'AssignmentId': '3LRKMWOKB85NZSH6UWAU4DD6JL92ZT',
        'HITId': '3D06DR522779J4FX1K7ED946IR8AM4',
        'WorkerId': 'A1RRMINTJ8BHKG',
        'feedback': None,
        'results': {   'assignmentId': '3LRKMWOKB85NZSH6UWAU4DD6JL92ZT',
                       'hitId': '3D06DR522779J4FX1K7ED946IR8AM4',
                       'results': {   'inputs': {   'bucketNum': 0,
                                                    'params': {   'ENABLE_MSEC_SET_URL': False,
                                                                  'GIVE_FEEDBACK': False,
                                                                  'IMG_HEIGHT': 700,
                                                                  'IMG_WIDTH': 1000,
                                                                  'MAX_INCORRECT_SENTINELS_ALLOWED_TUTORIAL': 3,
                                                                  'MAX_INVALID_ALLOWED_TUTORIAL': 3,
                                                  

                                                                                              'confidence': '4',
                                                                                              'estimation1': '15 '
                                                                                                             'sec',
                                                                                              'estimation2': '25 '
                                                                                                             'sec'}},
                                                                  '5': {   'survey_data': {   'blockAnswer': '1',
                                                                                              'confidence': '3',
                                                                                              'estimation1': '20sec',
                                                                              

                                                                                                       0,
                                                                                                       0,
                                                                                                       0,
                                                                                                       1,
                                                                                                       0,
                                                                                                       0,
                                                                                                       0]}}]},
                                      'outputs': {   'tasks': {   '0': {   'survey_data': {   'blockAnswer': '1',
                                                                                              'confidence': '3',
                          

                                                                  '11': {   'survey_data': {   'ageGroup': 'ageGroup18',
                                                                                               'difference': 'A '
                                                                                                             'couple '
                                                                                                             'of '
                                                                                                             'seconds '
                                                                                                             'quicker '
                                                                                                             'or '
                                                                                                             'longer',
                                                         

                                                                  'NUM_MSEC_CROSS': 750,
                                                                  'NUM_MSEC_FEEDBACK': 2000,
                                                                  'NUM_MSEC_IMAGE': 2000,
                                                                  'NUM_MSEC_SENTINEL': 750,
                                                                  'N_BUCKETS': 1,
                                                                  'N_SUBJ_FILES': 1},
                                                    'subjFilePath': 'assets/task_data/full_subject_files/bucket0/subject_file_24.json',
                                                    'subjId': 24,
                                                    'tasks': [   {   'firstblock': {   'complex': 0,
                                                                                       'images': [   'simple_001_output.jpg',
                                       

        'feedback': None,
        'results': {   'assignmentId': '3TU5ZICBRGPP7FI4NH0INBE50HFQ8N',
                       'hitId': '3UQ1LLR26CWPDMXC186ZDWPHIMIAL2',
                       'results': {   'inputs': {   'bucketNum': 0,
                                                    'params': {   'ENABLE_MSEC_SET_URL': False,
                                                                  'GIVE_FEEDBACK': False,
                                                                  'IMG_HEIGHT': 700,
                                                                  'IMG_WIDTH': 1000,
                                                                  'MAX_INCORRECT_SENTINELS_ALLOWED_TUTORIAL': 3,
                                                                  'MAX_INVALID_ALLOWED_TUTORIAL': 3,
                                                                  'NUM_BLOCKS_TRIAL': 2,
                                                                  'NUM_IMGS_BLOCK': 15,
                    

                                                                                                      0,
                                                                                                      0,
                                                                                                      1,
                                                                                                      0,
                                                                                                      0,
                                                                                                      0]},
                                                                     'orderNum': 3,
                                                                     'secondblock': {   'complex': 1,
                                                                                        'images': [   'complex_037_output.jpg',
                                                      

                                                                                                     'simple_112_output.jpg',
                                                                                                     'simple_113_output.jpg',
                                                                                                     'simple_114_output.jpg',
                                                                                                     'simple_o_029_output.jpg',
                                                                                                     'simple_115_output.jpg',
                                                                                                     'simple_116_output.jpg',
                                                                                                     'simple_117_output.jpg',
                                                                                                     'simple_o_030_o

                                                                                       'index': 0,
                                                                                       'outlier': [   0,
                                                                                                      0,
                                                                                                      0,
                                                                                                      0,
                                                                                                      1,
                                                                                                      0,
                                                                                                      1,
                                                                                                      0,
                                                             

                                                                                              'estimation1': '30 '
                                                                                                             'secs',
                                                                                              'estimation2': '28 '
                                                                                                             'secs'}},
                                                                  '5': {   'survey_data': {   'blockAnswer': '1',
                                                                                              'confidence': '2',
                                                                                              'estimation1': '29 '
                                                                                                             'secs',
                                                                           

                                                                                                       0]}},
                                                                 {   'firstblock': {   'complex': 1,
                                                                                       'images': [   'complex_073_output.jpg',
                                                                                                     'complex_074_output.jpg',
                                                                                                     'complex_075_output.jpg',
                                                                                                     'complex_076_output.jpg',
                                                                                                     'complex_o_019_output.jpg',
                                                                                                     'complex_077_output.jpg',
                          

                                                                     'secondblock': {   'complex': 0,
                                                                                        'images': [   'simple_061_output.jpg',
                                                                                                      'simple_o_016_output.jpg',
                                                                                                      'simple_062_output.jpg',
                                                                                                      'simple_063_output.jpg',
                                                                                                      'simple_064_output.jpg',
                                                                                                      'simple_065_output.jpg',
                                                                                                      'simple_066_output.jpg',
       

                                                                  'MAX_INVALID_ALLOWED_TUTORIAL': 3,
                                                                  'NUM_BLOCKS_TRIAL': 2,
                                                                  'NUM_IMGS_BLOCK': 15,
                                                                  'NUM_MSEC_CHAR': 400,
                                                                  'NUM_MSEC_CROSS': 750,
                                                                  'NUM_MSEC_FEEDBACK': 2000,
                                                                  'NUM_MSEC_IMAGE': 2000,
                                                                  'NUM_MSEC_SENTINEL': 750,
                                                                  'N_BUCKETS': 1,
                                                                  'N_SUBJ_FILES': 1},
                                                    'subjFilePath': 'assets/task_data/full_subject_fil

In [87]:
print(cl.get_account_balance()['AvailableBalance'])

12.80


In [148]:
print(cl.get_account_balance()['AvailableBalance'])

197.60


In [149]:
worker_ids = ['A3G8OON0TDPN1E'] # worker_id strings in a list
compensation = "2.00" # change to the amount of dollars you want to give
for_hit_id = "outdoorscenes" # hit_id string (what you are compensating for)compensate_workers(worker_ids, compensation, for_hit_id)
#compensate_workers(worker_ids,compensation,for_hit_id)

{'QualificationType': {'QualificationTypeId': '30FWQI8L2GUD26LEBXVDY44TD33TN1', 'CreationTime': datetime.datetime(2019, 5, 14, 16, 17, 7, tzinfo=tzlocal()), 'Name': '179bb8de-2b37-402e-a48e-b77fcf2f76e1', 'Description': 'Compensation for HIT outdoorscenes', 'Keywords': 'compensation, outdoorscenes', 'QualificationTypeStatus': 'Active', 'IsRequestable': True, 'AutoGranted': False}, 'ResponseMetadata': {'RequestId': '10024adb-99b5-4735-a3a2-815dda00ef2e', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '10024adb-99b5-4735-a3a2-815dda00ef2e', 'content-type': 'application/x-amz-json-1.1', 'content-length': '321', 'date': 'Tue, 14 May 2019 23:17:07 GMT'}, 'RetryAttempts': 0}}
id 30FWQI8L2GUD26LEBXVDY44TD33TN1
{'ResponseMetadata': {'RequestId': '44227c64-b2d5-4a5f-a81f-4c27bf2df95a', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '44227c64-b2d5-4a5f-a81f-4c27bf2df95a', 'content-type': 'application/x-amz-json-1.1', 'content-length': '2', 'date': 'Tue, 14 May 2019 23:17:

In [150]:
print(cl.get_account_balance()['AvailableBalance'])

195.20


In [99]:
worker_ids = ['A8PJHNOLEDTZI'] # worker_id strings in a list
compensation = "2.00" # change to the amount of dollars you want to give
for_hit_id = "outdoorscenes" # hit_id string (what you are compensating for)compensate_workers(worker_ids, compensation, for_hit_id)
#compensate_workers(worker_ids,compensation,for_hit_id)

{'QualificationType': {'QualificationTypeId': '3NSALWUU1S6O8ZVEQND1HKUKIKF9GA', 'CreationTime': datetime.datetime(2019, 5, 16, 14, 51, 42, tzinfo=tzlocal()), 'Name': 'b23acef4-5961-4c23-adcf-ceb2bd93209d', 'Description': 'Compensation for HIT outdoorscenes', 'Keywords': 'compensation, outdoorscenes', 'QualificationTypeStatus': 'Active', 'IsRequestable': True, 'AutoGranted': False}, 'ResponseMetadata': {'RequestId': 'ed89b9c8-649d-4aca-b1c0-5ee3578532a5', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'ed89b9c8-649d-4aca-b1c0-5ee3578532a5', 'content-type': 'application/x-amz-json-1.1', 'content-length': '321', 'date': 'Thu, 16 May 2019 21:51:42 GMT'}, 'RetryAttempts': 0}}
id 3NSALWUU1S6O8ZVEQND1HKUKIKF9GA
{'ResponseMetadata': {'RequestId': 'a79f78b6-9993-45c9-a7bb-f883839cb6b2', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'a79f78b6-9993-45c9-a7bb-f883839cb6b2', 'content-type': 'application/x-amz-json-1.1', 'content-length': '2', 'date': 'Thu, 16 May 2019 21:51

In [152]:
print(cl.get_account_balance()['AvailableBalance'])

192.80


In [153]:
worker_ids = ['A2G7DTL156PTKA'] # worker_id strings in a list
compensation = "2.00" # change to the amount of dollars you want to give
for_hit_id = "outdoorscenes" # hit_id string (what you are compensating for)compensate_workers(worker_ids, compensation, for_hit_id)
#compensate_workers(worker_ids,compensation,for_hit_id)

{'QualificationType': {'QualificationTypeId': '35724I4GD00VKRLQCCVQDT4Y3D8TOG', 'CreationTime': datetime.datetime(2019, 5, 14, 16, 24, 2, tzinfo=tzlocal()), 'Name': '95c443c3-3543-4f00-826e-9aef5d0802d6', 'Description': 'Compensation for HIT outdoorscenes', 'Keywords': 'compensation, outdoorscenes', 'QualificationTypeStatus': 'Active', 'IsRequestable': True, 'AutoGranted': False}, 'ResponseMetadata': {'RequestId': '4beac916-359f-479a-93cc-8c5a9b1c499c', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '4beac916-359f-479a-93cc-8c5a9b1c499c', 'content-type': 'application/x-amz-json-1.1', 'content-length': '321', 'date': 'Tue, 14 May 2019 23:24:02 GMT'}, 'RetryAttempts': 0}}
id 35724I4GD00VKRLQCCVQDT4Y3D8TOG
{'ResponseMetadata': {'RequestId': '1434b908-5a1a-4939-83ef-37a4727dd94e', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '1434b908-5a1a-4939-83ef-37a4727dd94e', 'content-type': 'application/x-amz-json-1.1', 'content-length': '2', 'date': 'Tue, 14 May 2019 23:24:

In [154]:
worker_ids = ['A38OIUKAISD8E9'] # worker_id strings in a list
compensation = "2.00" # change to the amount of dollars you want to give
for_hit_id = "outdoorscenes" # hit_id string (what you are compensating for)compensate_workers(worker_ids, compensation, for_hit_id)
#compensate_workers(worker_ids,compensation,for_hit_id)

{'QualificationType': {'QualificationTypeId': '3OPOXV2R27M2QLIZBJ7Q20CPDROI8L', 'CreationTime': datetime.datetime(2019, 5, 14, 16, 26, 34, tzinfo=tzlocal()), 'Name': 'a8da18e6-bda0-4e78-9f55-5475e5956231', 'Description': 'Compensation for HIT outdoorscenes', 'Keywords': 'compensation, outdoorscenes', 'QualificationTypeStatus': 'Active', 'IsRequestable': True, 'AutoGranted': False}, 'ResponseMetadata': {'RequestId': '384bcdc1-6d04-4883-98d7-c29bd9150559', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '384bcdc1-6d04-4883-98d7-c29bd9150559', 'content-type': 'application/x-amz-json-1.1', 'content-length': '321', 'date': 'Tue, 14 May 2019 23:26:34 GMT'}, 'RetryAttempts': 0}}
id 3OPOXV2R27M2QLIZBJ7Q20CPDROI8L
{'ResponseMetadata': {'RequestId': 'f73989aa-5fe4-42f7-8792-9ee1f4e939bd', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'f73989aa-5fe4-42f7-8792-9ee1f4e939bd', 'content-type': 'application/x-amz-json-1.1', 'content-length': '2', 'date': 'Tue, 14 May 2019 23:26

In [155]:
worker_ids = ['A2N2GFB33ZMYGF'] # worker_id strings in a list
compensation = "2.00" # change to the amount of dollars you want to give
for_hit_id = "outdoorscenes" # hit_id string (what you are compensating for)compensate_workers(worker_ids, compensation, for_hit_id)
#compensate_workers(worker_ids,compensation,for_hit_id)

{'QualificationType': {'QualificationTypeId': '32S8022MNTV3XL33KJEQD4OCS9ML4V', 'CreationTime': datetime.datetime(2019, 5, 14, 16, 31, 11, tzinfo=tzlocal()), 'Name': '645dc56c-f0af-4bc0-afec-fb3e89fdefba', 'Description': 'Compensation for HIT outdoorscenes', 'Keywords': 'compensation, outdoorscenes', 'QualificationTypeStatus': 'Active', 'IsRequestable': True, 'AutoGranted': False}, 'ResponseMetadata': {'RequestId': 'c984397c-3174-4bb9-98c0-c843030ced87', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'c984397c-3174-4bb9-98c0-c843030ced87', 'content-type': 'application/x-amz-json-1.1', 'content-length': '321', 'date': 'Tue, 14 May 2019 23:31:11 GMT'}, 'RetryAttempts': 0}}
id 32S8022MNTV3XL33KJEQD4OCS9ML4V
{'ResponseMetadata': {'RequestId': '17dabb99-61b0-4357-980f-8ff3b2e576a2', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '17dabb99-61b0-4357-980f-8ff3b2e576a2', 'content-type': 'application/x-amz-json-1.1', 'content-length': '2', 'date': 'Tue, 14 May 2019 23:31

In [156]:
print(cl.get_account_balance()['AvailableBalance'])

185.60
