# Qualtrics survey creation

In [96]:
import html
import json
import re
from pathlib import Path

import pandas as pd

In [97]:
data_dir = Path("../data")
assert data_dir.exists()
figures_dir = Path("../figures")
figures_dir.mkdir(exist_ok=True)
assert figures_dir.exists()

In [98]:
template_filepath = data_dir / "raw" / "qualtrics" / "Rori_ranking_annotations_-_template.qsf"
with open(template_filepath) as infile:
    survey_text = infile.read()
    assert json.loads(survey_text)

Keys to fill:

 - RoriSurveyId
 - Response{1,2,3}Q*
 - QueryTextQ*
 - DocumentQ*


In [99]:
response_keys = ["Response1Q", "Response2Q", "Response3Q"]
query_text_key = "QueryTextQ"
document_key = "DocumentQ"

# validate expected keys
expected_survey_size = 15
for key in response_keys + [query_text_key, document_key]:
    for i in range(1, expected_survey_size + 1):
        qkey = key + str(i)
        assert qkey in survey_text, qkey

In [100]:
for result in re.finditer("Response1Q2(?![0-9])", survey_text):
    print(result)
    ind = result.span()[0]
    print(survey_text[ind - 10 : ind + 15])
re.findall("Response1Q2(?![0-9])", survey_text)

<re.Match object; span=(78284, 78295), match='Response1Q2'>
Display":"Response1Q2"},"


['Response1Q2']

In [101]:
def convert_text(text, use_br=True):
    text = html.escape(text.replace("\\", "/"))
    # text = "<p>" + "<\\/p><br><p>".join(text.split("\n")) + "<\\/p>"
    if use_br:
        text = "<p>" + "</p><br><p>".join(text.split("\n")) + "</p>"
    else:
        text = "<p>" + "</p><p>".join(text.split("\n")) + "</p>"
    return text


expected_survey_size = 15
for i in range(1, expected_survey_size + 1):
    r1 = "R1 Multi-line string\n\nSeveral bits here are normal:\n - 1\n - 2\n - 3"
    r2 = r"R2 Single line string, with some maybe-problematic characters: /\!@#$%^&*()_-+"
    r3 = "R3"
    responses = [r1, r2, r3]
    query = f"Test query for page {i}"
    document = "Paragraph 1: text goes here\nParagraph 2: Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
    for key, response in zip(response_keys, responses):
        qkey = key + str(i)
        text = convert_text(response, use_br=False)
        survey_text = survey_text.replace(qkey, text, 1)
    survey_text = survey_text.replace(query_text_key + str(i), convert_text(query), 1)
    survey_text = survey_text.replace(document_key + str(i), convert_text(document), 1)

In [102]:
ind = 67956
band = 20
survey_text[ind - band : ind + band]

'lti-line string</p><p></p><p>Several bit'

In [103]:
# verify that we've created valid JSON
survey_text = json.dumps(json.loads(survey_text))

In [104]:
survey_dir = data_dir / "derived" / "qualtrics"
survey_dir.mkdir(exist_ok=True)
survey_filepath = survey_dir / "Rori_ranking_annotations_-_survey1.qsf"
with open(survey_filepath, "w") as outfile:
    outfile.write(survey_text)