In [1]:
import os
import re
from random import choice
import json
from collections import defaultdict
from os.path import isfile
import sys
from copy import deepcopy

In [2]:
social_dims = ['baseline','support', 'knowledge', 'conflict', 'similarity', 'fun', 'status', 'trust', 'identity']

In [3]:
def get_convincing_arguments(social_dim, return_counts=False):
    """
    Given a social dimension, parse all the arguments generated by the corresponding Convincer,
    return the amount of arguments which convinced the Skeptic.
    """
    convincing_arguments = []
    files_to_check = os.listdir(f"../conversation_generation/convs/convs_{social_dim}/")
    for filename in files_to_check:
        with open(f"../conversation_generation/convs/convs_{social_dim}/{filename}", "r") as f:
            conv = f.read().strip()
            conv_hist = re.split(pattern = "(Convincer:|Skeptic:)", string = conv)
            convincer_argument = conv_hist[6].strip()
            skeptic_opinion = conv_hist[-1].strip()
            opinion_signal = str.lower(skeptic_opinion.split(" ")[0])
            if "yes" in opinion_signal:
                convincing_arguments.append((convincer_argument,f"convs_{social_dim}/{filename}"))
    return convincing_arguments


convincing_arguments_by_dim = {social_dim: get_convincing_arguments(social_dim) for social_dim in social_dims}

In [4]:
# The amount of convincing arugments per dimension
for dim in social_dims:
    print(dim,len(list(set(convincing_arguments_by_dim[dim]))))

baseline 8
support 64
knowledge 22
conflict 3
similarity 17
fun 20
status 42
trust 65
identity 7


In [6]:
def select_arguments_for_matches(convincing_arguments_by_dim=convincing_arguments_by_dim):
    """
    Go through all the pairs of social dimensons, 
    and select 5 convincing arguments from each randomly.
    Returns the file of matchups which will be used to create the MTurk batch.
    """
    counter = 0
    text_to_id = {}
    matches = defaultdict(list)

    for _ in range(5):
        for dim1 in social_dims:
            for dim2 in social_dims:
                if dim1 != dim2 and f"{dim2},{dim1}" not in matches: # Do not pair itself, and avoid repeat matches
                    dict_to_append = {}
                    for i,dim_iter in enumerate([dim1,dim2]):
                        argument,filename = choice(convincing_arguments_by_dim[dim_iter])
                        if argument in text_to_id:
                            id1 = text_to_id[argument]
                        else:
                            text_to_id[argument] = counter
                            id1 = counter
                            counter +=1

                        dict_to_append[f'arg{i+1}'] = {'text':argument,'id':id1,'filename':filename}
                    matches[f"{dim1},{dim2}"].append(deepcopy(dict_to_append))
    return dict(matches)

if isfile('total_5_arguments_for_mturk.json'):
    with open('total_5_arguments_for_mturk.json') as matchfile:
        matches = json.load(matchfile)
else:
    matches = select_arguments_for_matches()
    with open('total_5_arguments_for_mturk.json','w') as f:
        json.dump(matches,f)

In [3]:
def get_id_to_text(matches):
    id_to_text = {}
    text_to_id = {}
    for dim in matches:
        for match in matches[dim]:
            for arg in match['arg1'],match['arg2']:
                id_to_text[arg['id']] = arg['text']
                text_to_id[arg['text']] = arg['id']
    return id_to_text, text_to_id
id_to_text,text_to_id = get_id_to_text(matches)

In [11]:
def insert_newline_every_x_chars(text, max_len):
    """
    Helper function for creating readable SVGs which aren't infinitely wide.
    """
    words = text.replace('\n',' \n ').split(" ")
    modified_text = ""
    count = 0
    
    for i, word in enumerate(words):
        if not word:
            continue
        if word == "\n":
            modified_text += f"{word}"
            count = 0  # Reset count on encountering a normal line break
        elif (len(word) + count) > max_len:
            modified_text += f"\n{word} " 
            count = len(word)+1
        else:
            modified_text += f"{word} "
            count += len(word)+1
    
    return modified_text


In [12]:
def make_svgs(text):
    """Given a text, create an SVG image which can be displayed on MTurk"""
    template = """
     <svg viewBox="0 0 700 {img_height}" xmlns="http://www.w3.org/2000/svg">
  <style>
    .small {{
      font: 20px sans-serif;
    }}
    .heavy {{
      font: bold 30px sans-serif;
    }}

  </style>

<rect x="0" y="0" width="700" height="{img_height}" fill="white"/>
  <text x="0" y="35" class="small">
  {text_template}
  </text>
  </svg>"""
    text_template =   """<tspan x="10" dy="1.2em">
     {new_text}
  </tspan>"""
    pieces_of_text = []
    text = insert_newline_every_x_chars(text,57)
    lines = text.split('\n')
    for line in lines:
        pieces_of_text.append(text_template.format(new_text=line))
        
    full_svg = template.format(text_template = "\n".join(pieces_of_text),img_height=len(lines)*26)
    return full_svg

In [202]:
for text_id, text in sorted(id_to_text.items(),key=lambda x:x[0]):
    text_new = text.split('\n_____')[0]

    with open(f'svg_img/image{text_id}.svg', 'w') as f:
        print(make_svgs(text_new),file=f)

In [16]:
for file in os.listdir('control_samples'):
    with open(f'control_samples/{file}') as f:
        text = f.read()
        with open(f'svg_img/image-{file.replace(".","-").replace("_","-")}.svg', 'w') as g:
            print(make_svgs(text),file=g)

In [4]:
import json
with open('total_5_arguments_for_mturk.json') as matchfile:
    matches = json.load(matchfile)
print(len(matches))

36
