In [66]:
from easyturk import interface
from easyturk import EasyTurk

import json
import numpy as np
import os

from IPython.display import Markdown, display

# Configuration

In [132]:
##################################################
# CHANGE `hit_type` TO RUN ANALYSIS ON DIFFERENT TYPES OF TASKS/STUDIES
##################################################

hit_type = "pilot_allcats_no_translator"
    #pilot_allcats_no_translator | pilot_binary_no_translator

In [133]:
# Filepath configurations
if hit_type == "pilot_allcats_no_translator":
    hit_ids_filename = "pilot_hits_v2/pilot_allcats_no_translator.json"
    hit_config_json_path = "pilot_hits_v2/pilot_allcats_no_translator_hits/"
    rendered_hits_dir = "analysis_templates/pilot_allcats_no_translator/"
elif hit_type == "pilot_binary_no_translator":
    hit_ids_filename = "pilot_hits_v2/pilot_binary_no_translator.json"
    hit_config_json_path = "pilot_hits_v2/pilot_binary_no_translator_hits/"
    rendered_hits_dir = "analysis_templates/pilot_binary_no_translator/"
else:
    raise ValueError("Unrecognized hit type: {}. Add configuration above.".format(hit_type))

# Options
use_sandbox = False
run_rendering = True

In [134]:
et = EasyTurk(sandbox=use_sandbox)

# Helper Functions

In [135]:
def printmd(string):
    display(Markdown(string))
    
def visualize_dialog1(dialog_history):
    ques_i = 0
    for speaker, utterance in dialog_history:
        if speaker == "user_question":
            print("Q{}, USER: {}".format(ques_i, utterance))
        elif speaker == "bot_answer":
            print("\t\t\t\t\t\t\t\t Q{}, ANSWERER: {}".format(ques_i, utterance))
        elif speaker == "translator":
            print("TRANSLATOR: FORMATTING TODO")
        else:
            print("UNEXPECTED SPEAKER: {}".format(speaker))
        ques_i += 1
            
def visualize_dialog2(dialog_history, image_selection_correctness):
    ques_i = -1
    for speaker, utterance in dialog_history:
        if speaker == "user_question":
            ques_i += 1
            print("\nQ{}, USER: {}".format(ques_i, utterance))
        elif speaker == "bot_answer":
            print("\t\t\t\t\t\t\t\t Q{}, ANSWERER: {}".format(ques_i, utterance))
            if image_selection_correctness[ques_i]:
                print("\t Q{}, correct image selection".format(ques_i))
            else:
                print("\t Q{}, incorrect image selection".format(ques_i))
        elif speaker == "translator":
            print("TRANSLATOR: FORMATTING TODO")
        else:
            print("UNEXPECTED SPEAKER: {}".format(speaker))
            
            
def get_im_selection_correctness(target_image_id, image_selections):
    correctness = []
    for i, im_select in enumerate(image_selections):
        correctness.append(im_select == target_image_id)
        
    return correctness

def get_render_command1(pool_index, difficulty, target_image_id, hit_id):
    
    command = "python easyturk/render.py --template guesswhich/index_et.html"
    command += "--output analysis_templates/{}.html --specify_config".format(hit_id)
    command += " --pool_index {} --difficulty {} --target_image_id {}" \
                .format(pool_index, difficulty, target_image_id)
    return command

def get_render_command2(hit_id, output_savepath):
    command = "python easyturk/render.py --template guesswhich/index_visualize.html"
    command += " --output analysis_templates/{}.html --HIT-config-json \'{}\'".format(hit_id, output_savepath)
    return command

def get_render_command3(hit_id, rendered_hits_dir, output_savepath):
    command = "python easyturk/render.py --template guesswhich/index_visualize.html"
    command += " --output {}/{}.html --HIT-config-json \'{}\'".format(rendered_hits_dir, hit_id, output_savepath)
    return command


# Load

In [136]:
hit_ids = json.load(open(hit_ids_filename, 'r'))
print("Loaded {} HIT ids.".format(len(hit_ids)))
print("Loaded HIT ids = \n{}\n\n".format(hit_ids))

# all categories:
#hit_ids = [u'3RBI0I35XE2BM4916S6OK1P2A3B3Y6', u'37Y5RYYI0P4Q6G46RBLN9VQ50DFSXG', u'3QD8LUVX4XXP0E6VFF8FGKGDMT85XS', u'35ZRNT9RUIXZSQ5Z05XQIAR0WTBO3F', u'3TL87MO8CMOM2HIV78I0R2C34EBLF4', u'39WICJI5ATRBRUJ62E87Y8Y4GXF3ZX', u'3YGE63DIN8WBG7F4846HX66FWJQW04', u'3W5PY7V3UP6M7IBQ9DYB086R60WYJC', u'3P4C70TRMRGSGHEQISOI9NCF5TBLG3', u'38VTL6WC4ACE0JW5KBUHZYNMOZ25YY']

# binary:
#hit_ids = [u'37SDSEDIN910XV446KUBXVM3J1K18I', u'3ZLW647WALULLJGUD7ZTBXOXVOF32O', u'3K3IX1W4S6QHXBGLOKPFUIAWC8ZAPE', u'30EV7DWJTVUAJNWJPU4MR41MKUBY65', u'3VDI8GSXAFSX1AU7O3FE4IQ9LTC8G4', u'3D5G8J4N5A3ZHRCQGWPVVFP6MLKVT4', u'38G0E1M85M4EH4RFG1F50BB85ZRVUV', u'3XJOUITW8UQ7CJG9UXVLPSQY5T5QT5', u'3EHIMLB7F7YB16PI89XWPI2A0TP8H3', u'3QHITW7OYO82XNH0OX44UN06H4LAQ5']

results = interface.fetch_completed_hits(hit_ids, approve=False, sandbox=use_sandbox)
print("Loaded results for {} HITs.".format(len(results)))
print("Results HIT ids = \n{}".format(results.keys()))

Loaded 10 HIT ids.
Loaded HIT ids = 
[u'3RBI0I35XE2BM4916S6OK1P2A3B3Y6', u'37Y5RYYI0P4Q6G46RBLN9VQ50DFSXG', u'3QD8LUVX4XXP0E6VFF8FGKGDMT85XS', u'35ZRNT9RUIXZSQ5Z05XQIAR0WTBO3F', u'3TL87MO8CMOM2HIV78I0R2C34EBLF4', u'39WICJI5ATRBRUJ62E87Y8Y4GXF3ZX', u'3YGE63DIN8WBG7F4846HX66FWJQW04', u'3W5PY7V3UP6M7IBQ9DYB086R60WYJC', u'3P4C70TRMRGSGHEQISOI9NCF5TBLG3', u'38VTL6WC4ACE0JW5KBUHZYNMOZ25YY']


Loaded results for 10 HITs.
Results HIT ids = 
[u'37Y5RYYI0P4Q6G46RBLN9VQ50DFSXG', u'3YGE63DIN8WBG7F4846HX66FWJQW04', u'3RBI0I35XE2BM4916S6OK1P2A3B3Y6', u'3QD8LUVX4XXP0E6VFF8FGKGDMT85XS', u'39WICJI5ATRBRUJ62E87Y8Y4GXF3ZX', u'3W5PY7V3UP6M7IBQ9DYB086R60WYJC', u'3P4C70TRMRGSGHEQISOI9NCF5TBLG3', u'35ZRNT9RUIXZSQ5Z05XQIAR0WTBO3F', u'3TL87MO8CMOM2HIV78I0R2C34EBLF4', u'38VTL6WC4ACE0JW5KBUHZYNMOZ25YY']


# Analyze

In [137]:
if not os.path.exists(hit_config_json_path):
    os.makedirs(hit_config_json_path)
    
"Saving HIT configurations for rendering in {}.".format(hit_config_json_path)

if not os.path.exists(rendered_hits_dir):
    os.makedirs(rendered_hits_dir)
    
"Saving rendered HIT html in {}.".format(rendered_hits_dir)

'Saving rendered HIT html in analysis_templates/pilot_allcats_no_translator/.'

In [138]:
# Initialize data structures to track analysis
unique_workers = set()
all_num_correct_selections = []
all_final_num_failures = []

In [139]:
for hit_id, result in results.items():
    print("="*115)
    print("="*115)
    print("")
    print("Analyzing HIT id {}.".format(hit_id))
    
    worker_id = result[0]['worker_id']
    unique_workers.add(worker_id)
    print("Completed by worker {}.".format(worker_id))
    
    output = json.loads(result[0]['output'])
    print("Output contains keys:{}".format(output.keys()))
    dialog_history = output['dialog_history']
    #print("\nVisualizing dialog:")
    #visualize_dialog(dialog_history)
    target_image_id = output['target_image_id']
    image_selections = output['image_selections']
    image_selection_correctness = get_im_selection_correctness(target_image_id, image_selections)
    print("\nVisualizing dialog:")
    visualize_dialog2(dialog_history, image_selection_correctness)
    
    num_correct_selections = sum(image_selection_correctness)
    all_num_correct_selections.append(num_correct_selections)
    print("\nDuring dialog, worker made {} correct image selections.".format(num_correct_selections))
    
    final_image_list = output['final_image_list']
    final_num_failures = len(final_image_list)-1
    all_final_num_failures.append(final_num_failures)
    print("\nFinal guessing period, worker failed {} times before finding target.".format(final_num_failures))
    
    #pool_index = output['pool_index']
    #difficulty = output['difficulty_level']
    #render_command = get_render_command1(pool_index, difficulty, target_image_id)
    output_savepath = os.path.join(hit_config_json_path, hit_id+".json")
    #render_command = get_render_command2(hit_id, output_savepath)
    render_command = get_render_command3(hit_id, rendered_hits_dir, output_savepath)
    
    if run_rendering:
        print("\nSaving HIT output as json to {}".format(output_savepath))
        json.dump(output, open(output_savepath,'w'))
        print("\nExecuting render command = {}".format(render_command))
        !{render_command}
    else:
        print("Already rendered. Would've used command = {}".format(render_command))


    
    
    
    
    


Analyzing HIT id 37Y5RYYI0P4Q6G46RBLN9VQ50DFSXG.
Completed by worker ACAJFF4MF5S5X.
Output contains keys:[u'image_selections', u'total_payment', u'difficulty_level', u'pool_index', u'answerer_type', u'study_type', u'dialog_history', u'translator_type', u'study_description', u'target_image_id', u'current_in_game_bonus', u'final_image_list', u'current_finalround_bonus']

Visualizing dialog:

Q0, USER: is there a train in the image?
								 Q0, ANSWERER: yes
	 Q0, incorrect image selection

Q1, USER: is there yellow on the front of the train?
								 Q1, ANSWERER: yes
	 Q1, incorrect image selection

Q2, USER: Are there trees in the image?
								 Q2, ANSWERER: yes
	 Q2, incorrect image selection

Q3, USER: Is there a pole on the left side?
								 Q3, ANSWERER: yes
	 Q3, incorrect image selection

Q4, USER: Are the leaves on the trees yellow?
								 Q4, ANSWERER: yes
	 Q4, incorrect image selection

Q5, USER: is the sky blue?
								 Q5, ANSWERER: yes
	 Q5, incorrect image select


Analyzing HIT id 3W5PY7V3UP6M7IBQ9DYB086R60WYJC.
Completed by worker A3KVYNEM12AD1C.
Output contains keys:[u'image_selections', u'total_payment', u'difficulty_level', u'pool_index', u'answerer_type', u'study_type', u'dialog_history', u'translator_type', u'study_description', u'target_image_id', u'current_in_game_bonus', u'final_image_list', u'current_finalround_bonus']

Visualizing dialog:

Q0, USER: Are there people in the image?
								 Q0, ANSWERER: yes
	 Q0, incorrect image selection

Q1, USER: Is there more than one person in the image?
								 Q1, ANSWERER: yes
	 Q1, incorrect image selection

Q2, USER: Is the image outside?
								 Q2, ANSWERER: yes
	 Q2, incorrect image selection

Q3, USER: Is it the bottom left picture?
								 Q3, ANSWERER: yes
	 Q3, incorrect image selection

Q4, USER: Alrighty
								 Q4, ANSWERER: yes
	 Q4, incorrect image selection

Q5, USER: Is there a vehicle in the image?
								 Q5, ANSWERER: yes
	 Q5, incorrect image selection

Q6, USER: Is th

In [140]:
print("{} unique workers completed tasks:\n{}".format(len(unique_workers), unique_workers))

8 unique workers completed tasks:
set([u'AO33H4GL9KZX9', u'A3KVYNEM12AD1C', u'A8TIL9U6YHG5', u'ACAJFF4MF5S5X', u'A2WGW5Y3ZFBDEC', u'A1T643M1P572AA', u'A4J4GGMKJ68L0', u'A1PUHCEBSOWETV'])


In [141]:
print("Number of correct selections during dialog:\n\n")
print("Average = {}".format(np.mean(all_num_correct_selections)))
print("Standard Deviation = {}".format(np.std(all_num_correct_selections)))
print("Sample size = {}".format(len(all_num_correct_selections)))
print("\nMaximum = {}".format(np.max(all_num_correct_selections)))
print("Raw values = \n{}".format(all_num_correct_selections))


Number of correct selections during dialog:


Average = 0.2
Standard Deviation = 0.6
Sample size = 10

Maximum = 2
Raw values = 
[0, 0, 0, 0, 0, 0, 0, 0, 2, 0]


In [142]:
print("Number of errors before correct during final guessing:\n\n")
print("Average = {}".format(np.mean(all_final_num_failures)))
print("Standard Deviation = {}".format(np.std(all_final_num_failures)))
print("Sample size = {}".format(len(all_final_num_failures)))
print("\nMaximum = {}".format(np.max(all_final_num_failures)))
print("Minimum = {}".format(np.min(all_final_num_failures)))
print("Raw values = \n{}".format(all_final_num_failures))



Number of errors before correct during final guessing:


Average = 10.6
Standard Deviation = 4.52106182218
Sample size = 10

Maximum = 18
Minimum = 5
Raw values = 
[5, 16, 18, 5, 7, 14, 15, 8, 10, 8]
