# mechanical turk
### from https://blog.mturk.com/tutorial-mturk-using-python-in-jupyter-notebook-17ba0745a97f

In [1]:
import boto3
import xmltodict
import json
import requests
import datetime



In [2]:
create_hits_in_production = False
environments = {
  "production": {
    "endpoint": "https://mturk-requester.us-east-1.amazonaws.com",
    "preview": "https://www.mturk.com/mturk/preview"
  },
  "sandbox": {
    "endpoint": 
          "https://mturk-requester-sandbox.us-east-1.amazonaws.com",
    "preview": "https://workersandbox.mturk.com/mturk/preview"
  },
}
mturk_environment = environments["production"] if create_hits_in_production else environments["sandbox"]

session = boto3.Session(profile_name='mturk')
client = session.client(
    service_name='mturk',
    region_name='us-east-1',
    endpoint_url=mturk_environment['endpoint'],
)


In [3]:
print(client.get_account_balance()['AvailableBalance'])

10000.00


In [33]:
# generate captcha images and upload to s3
s3 = boto3.resource('s3',
                    aws_access_key_id="AKIAIQSJS7PBFDSNTUQQ",
                    aws_secret_access_key="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")
bucket_name = 'mturk-project-euler-captcha'

url = "https://projecteuler.net/captcha/show_captcha.php"
r = requests.get(url, stream=True)

current_date_string = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
file_name = f'{current_date_string}.png'
bucket = s3.Bucket(bucket_name)
bucket.upload_fileobj(r.raw, file_name, ExtraArgs={'ACL':'public-read'})

print(f"https://{bucket_name}.s3.us-east-2.amazonaws.com/{file_name}")

https://mturk-project-euler-captcha.s3.us-east-2.amazonaws.com/20190702-001941.png


In [34]:
html_layout = open('./captchaQuestion.html', 'r').read()
QUESTION_XML = """<HTMLQuestion xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2011-11-11/HTMLQuestion.xsd">
        <HTMLContent><![CDATA[{}]]></HTMLContent>
        <FrameHeight>650</FrameHeight>
        </HTMLQuestion>"""
question_xml = QUESTION_XML.format(html_layout)

In [35]:
TaskAttributes = {
    'MaxAssignments': 5,           
    # How long the task will be available on MTurk (1 hour)     
    'LifetimeInSeconds': 60*60,
    # How long Workers have to complete each item (10 minutes)
    'AssignmentDurationInSeconds': 60*1,
    # The reward you will offer Workers for each response
    'Reward': '0.01',                     
    'Title': 'Solve image captcha (5 numbers)',
    'Keywords': 'image, captcha, numbers',
    'Description': 'Transcribe this image, should be five numbers'
}

In [36]:
results = []
hit_type_id = ''

url = f"https://{bucket_name}.s3.us-east-2.amazonaws.com/{file_name}"

response = client.create_hit(
    **TaskAttributes,
    Question=question_xml.replace('${content}', url)
)
hit_type_id = response['HIT']['HITTypeId']
results.append({
    'url': url,
    'hit_id': response['HIT']['HITId']
})
    
print("You can view the HITs here:")
print(mturk_environment['preview']+"?groupId={}".format(hit_type_id))

You can view the HITs here:
https://workersandbox.mturk.com/mturk/preview?groupId=3T2WVEM8EKMG2MZ10I0ILPGP5P9WXQ


In [37]:
for item in results:
    
    # Get the status of the HIT
    hit = client.get_hit(HITId=item['hit_id'])
    item['status'] = hit['HIT']['HITStatus']

    # Get a list of the Assignments that have been submitted
    assignmentsList = client.list_assignments_for_hit(
        HITId=item['hit_id'],
        AssignmentStatuses=['Submitted', 'Approved'],
        MaxResults=10
    )
    assignments = assignmentsList['Assignments']
    item['assignments_submitted_count'] = len(assignments)

    answers = []
    for assignment in assignments:
    
        # Retreive the attributes for each Assignment
        worker_id = assignment['WorkerId']
        assignment_id = assignment['AssignmentId']
        
        # Retrieve the value submitted by the Worker from the XML
        answer_dict = xmltodict.parse(assignment['Answer'])
        answer = answer_dict['QuestionFormAnswers']['Answer']['FreeText']
        answers.append(int(answer))
        
        # Approve the Assignment (if it hasn't been already)
        if assignment['AssignmentStatus'] == 'Submitted':
            client.approve_assignment(
                AssignmentId=assignment_id,
                OverrideRejection=False
            )
    
    # Add the answers that have been retrieved for this item
    item['answers'] = answers
print(json.dumps(results,indent=2))

[
  {
    "url": "https://mturk-project-euler-captcha.s3.us-east-2.amazonaws.com/20190702-001941.png",
    "hit_id": "3OKP4QVBP23OJWTPGAY5UZQCLXDAG7",
    "status": "Assignable",
    "assignments_submitted_count": 1,
    "answers": [
      30022
    ]
  }
]
