In [None]:
import os
import json
import boto3
import pprint
import html

from pytz import timezone
import pytz
from datetime import datetime

from urllib.parse import urlencode, parse_qs, urlsplit, urlunsplit

import lxml.etree as etree
from xml.etree.ElementTree import Element, SubElement, tostring
from requests.models import PreparedRequest 

pp = pprint.PrettyPrinter( indent=4 )

# Getting the environmental variables
URL_MTURK_SANDBOX    = os.environ["URL_MTURK_SANDBOX"]
URL_MTURK_PRODUCTION = os.environ["URL_MTURK_PRODUCTION"]
IAM_USER_ACCESS_KEY  = os.environ["IAM_USER_ACCESS_KEY"]
IAM_USER_SECRET_KEY  = os.environ["IAM_USER_SECRET_KEY"]
MTURK_REGION_NAME    = os.environ["MTURK_REGION_NAME"]

URL_HIT_REMOTE = '' # The URL of the remote Heroku endpoint that offers the page to be rendered 
                    # in the frame of the mTurk HIT. E.g., https://[YOUR_APP_NAME].herokuapp.com/rec_serv
    
URL_HIT_LOCAL  = 'http://0.0.0.0:5000/[YOUR_APP_NAME]' # Same as before for the local URL 
                    
# These are the id of the five companies for which the resolution is required to the workers
# The ids corresponds to the id of the documents in the 'companies' database of Mongo Db
COMPANIES_FOR_PILOT = [    '5e4bfc63df8ef924f306bd04', 
                           '5e4bfc63df8ef924f306bd05', 
                           '5e4bfc63df8ef924f306bd06', 
                           '5e4bfc63df8ef924f306bd07', 
                           '5e4bfc63df8ef924f306bd08']

URL_HIT = URL_HIT_REMOTE            # Here you can decide to work with 'local' (for testing) or remote URL
ENVIRONMENT = URL_MTURK_SANDBOX     # here you can decide to work on Sandbox (for testing) or production  


In [None]:
# Getting the mTurk Client

client = boto3.client('mturk',
   aws_access_key_id = IAM_USER_ACCESS_KEY,
   aws_secret_access_key = IAM_USER_SECRET_KEY,
   region_name = MTURK_REGION_NAME,
   endpoint_url = ENVIRONMENT
)
print ("I have $" + client.get_account_balance()['AvailableBalance'] + " in my Sandbox account")

In [None]:
# This method is useful to 'force' the deletion of an HIT

def force_HIT_delete(hit_id):
    print('HITId:', hit_id)

    # Get HIT status
    status=client.get_hit(HITId=hit_id)['HIT']['HITStatus']
    print('HITStatus:', status)

    # If HIT is active then set it to expire immediately
    if status=='Assignable':
        response = client.update_expiration_for_hit(
            HITId=hit_id,
            ExpireAt=datetime(2015, 1, 1)
        )        

    # Delete the HIT
    try:
        client.delete_hit(HITId=hit_id)
    except Exception as e:
        print(e)
        print('Not deleted')
    else:
        print('Deleted')

In [None]:
# Here the ExternalQuestion for MTURK is built based on the company list
# Have a look here: https://docs.aws.amazon.com/AWSMechTurk/latest/AWSMturkAPI/ApiReference_ExternalQuestionArticle.html

def url_builder(url, params):
    req = PreparedRequest()
    req.prepare_url(url, params)
    return req.url

def external_question_builder( url, params ):

    XMLNS       = 'http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2006-07-14/ExternalQuestion.xsd'
    ExternalURL =  url_builder( url=url, params=params )

    external_question = Element('ExternalQuestion',  xmlns=XMLNS)
    external_url = SubElement(external_question, "ExternalURL")
    external_url.text = ExternalURL
    frame_height = SubElement(external_question, "FrameHeight")
    frame_height.text = '0'
    return tostring(external_question, method='html', encoding='unicode')

PARAMS = {}

for i, company in enumerate(COMPANIES_FOR_PILOT):
    PARAMS['comp_{}'.format(i)] = COMPANIES_FOR_PILOT[i]
    
print(url_builder( url = URL_HIT, params = PARAMS ))
print(external_question_builder( url = URL_HIT, params = PARAMS ))


In [None]:
# This block represent a qualification to filter in oly hight quality workers 

QUALIFICATION_HIGH_QUALITY_WORKERS = [ {
    'QualificationTypeId': '00000000000000000040',  # Worker number approved hit >= 0
    'Comparator': 'GreaterThanOrEqualTo',
    'IntegerValues': [
        150,
        ],
    },
    {
    'QualificationTypeId': '000000000000000000L0', # Worker_AssignmentsApproved >= 0%
    'Comparator': 'GreaterThan',
        'IntegerValues': [
        95,
        ],
    },
]

In [None]:
# This is where the HIT is created 

new_hit = client.create_hit(
    Title = 'Company reconciliation',
    Description = 'Match the given companies with the ones in OpenCcorporates.com',
    Keywords = 'Company matching',
    Reward = '0.15',                            # The reword for completing the task is 15cents
    MaxAssignments = 10,                        # No. of workers to require this HIT
    LifetimeInSeconds = 60 * 60 * 48,           # This task will stay alive for 2 days
    AssignmentDurationInSeconds = 30 * 60 ,     # A worker has to finish in 30 mins
    AutoApprovalDelayInSeconds = 600,           # Results are automatically approved in 10 mins
    Question = external_question_builder( url = URL_HIT, params = PARAMS ), 
    QualificationRequirements =  QUALIFICATION_HIGH_QUALITY_WORKERS  
)
print( "A new HIT has been created. You can preview it here:" )

if ENVIRONMENT == URL_MTURK_SANDBOX:
    print( "https://workersandbox.mturk.com/mturk/preview?groupId=" + new_hit['HIT']['HITGroupId'] )
else:
    print( "https://worker.mturk.com/mturk/preview?groupId=" + new_hit['HIT']['HITGroupId'] )

print( "HITID = " + new_hit['HIT']['HITId'] + " (Use to Get Results)" )



In [None]:
# Show the mTurk representation of the HIT just created
pp.pprint( client.get_hit(HITId = new_hit['HIT']['HITId']) )


In [None]:
# This block show the list of the HITS presend in MTurk and the relative Status

hits = client.list_hits(MaxResults= 100)

print("{} HITs found".format(len(hits['HITs'])))

for i, hit in enumerate(hits['HITs']):
    print( '\nHIT {}: {} (created: {})\n'.format(i, hit['HITId'], hit['CreationTime']) )
    print('  > Status: {}'.format( hit['HITStatus']))
    print('  > Available: {}'.format(  hit['NumberOfAssignmentsAvailable']) )
    print('  > Completed: {}'.format(  hit['NumberOfAssignmentsCompleted']) )
    print('  > Pending: {}'.format(  hit['NumberOfAssignmentsPending']) )
    
#     pp.pprint(hit)

