In [1]:
import json
import re
import pandas as pd
import numpy as np

def add_survey_questions(data, survey_id, starting_qid, block_id, section, text):
    new_questions = [
        {
          "SurveyID": survey_id,
          "Element": "SQ",
          "PrimaryAttribute": f"QID{starting_qid}",
          "SecondaryAttribute": " ",
          "TertiaryAttribute": None,
          "Payload": {
            "QuestionText": "<hr width=\"100%\" color=\"black\" size=\"5\">",
            "DataExportTag": f"QID{starting_qid}",
            "QuestionType": "DB",
            "Selector": "TB",
            "DataVisibility": {
              "Private": False,
              "Hidden": False
            },
            "Configuration": {
              "QuestionDescriptionOption": "UseText"
            },
            "QuestionDescription": " ",
            "ChoiceOrder": [],
            "Validation": {"Settings": {"Type": "None"}},
            "Language": [],
            "NextChoiceId": 1,
            "NextAnswerId": 1,
            "QuestionID": f"QID{starting_qid}",
          }
        },
        {
            "SurveyID": survey_id,
            "Element": "SQ",
            "PrimaryAttribute": f"QID{starting_qid + 1}",
            "SecondaryAttribute": f"{text[:10]}",
            # "SecondaryAttribute": f"{section}: {text}",
            "TertiaryAttribute": None,
            "Payload": {
                "QuestionText": f"<b>{text}</b>",
                # "QuestionText": f"<strong>{section}:</strong>&nbsp;{text}",
                "DataExportTag": f"Q{starting_qid + 1}",
                "QuestionType": "DB",
                "Selector": "TB",
                "DataVisibility": {
                    "Private": False,
                    "Hidden": False
                },
                "Configuration": {
                    "QuestionDescriptionOption": "UseText"
                },
                "QuestionDescription": f"{text[:10]}",
                # "QuestionDescription": f"{section}: {text}",
                "ChoiceOrder": [],
                "Validation": {
                    "Settings": {
                        "Type": "None"
                    }
                },
                "Language": [],
                "NextChoiceId": 1,
                "NextAnswerId": 1,
                "QuestionID": f"QID{starting_qid + 1}"
            }
        },
        {
            "SurveyID": survey_id,
            "Element": "SQ",
            "PrimaryAttribute": f"QID{starting_qid + 2}",
            "SecondaryAttribute": "Please rate this advice from 1 to 5 (mark N/A if it contains no advice)",
            "TertiaryAttribute": None,
            "Payload": {
                "QuestionText": "Please rate this advice from 1 to 5 (mark N/A if it contains no advice)\n",
                "QuestionID": f"QID{starting_qid + 2}",
                "QuestionType": "MC",
                "Selector": "SAHR",
                "SubSelector": "TX",
                "QuestionDescription": "Please rate this advice from 1 to 5 (mark N/A if it contains no advice)",
                "Choices": {
                    "1": {"Display": "1"},
                    "2": {"Display": "2"},
                    "3": {"Display": "3"},
                    "4": {"Display": "4"},
                    "5": {"Display": "5"},
                    "6": {"Display": "N/A"}
                },
                "ChoiceOrder": ["1", "2", "3", "4", "5", "6"],
                "Validation": {
                    "Settings":{
                        "ForceResponse":"ON",
                        "ForceResponseType":"ON",
                        "Type":"None"
                    }
                },
                "Language": [],
                "DataExportTag": f"QID{starting_qid + 2}",
                "DataVisibility": {
                    "Private": False,
                    "Hidden": False
                },
                "Configuration": {
                    "QuestionDescriptionOption": "UseText",
                    "LabelPosition": "SIDE"
                },
                "NextChoiceId": 7,
                "NextAnswerId": 1,
                "GradingData": []
            }
        },
        {
            "SurveyID": survey_id,
            "Element": "SQ",
            "PrimaryAttribute": f"QID{starting_qid + 3}",
            "SecondaryAttribute": "(Optional) Please select all of the following that apply",
            "TertiaryAttribute": None,
            "Payload": {
                "QuestionText": "(Optional) Please select all of the following that apply\n",
                "QuestionID": f"QID{starting_qid + 3}",
                "QuestionType": "MC",
                "Selector": "MAHR",
                "SubSelector": "TX",
                "QuestionDescription": "(Optional) Please select all of the following that apply",
                "Choices": {
                    "1": {"Display": "Overly generic"},
                    "2": {"Display": "Not realistic for patient"},
                    "3": {"Display": "Bad or inappropriate advice"},
                    "4": {"Display": "Factually incorrect"},
                    "5": {"Display": "Not following guidelines"}
                },
                "ChoiceOrder": ["1", "2", "3", "4", "5"],
                "Validation": {"Settings": {"Type": "None"}},
                "Language": [],
                "DataExportTag": f"QID{starting_qid + 3}",
                "DataVisibility": {
                    "Private": False,
                    "Hidden": False
                },
                "Configuration": {
                    "QuestionDescriptionOption": "UseText",
                    "LabelPosition": "SIDE"
                },
                "NextChoiceId": 5,
                "NextAnswerId": 1
            }
        },
        {
            "SurveyID": survey_id,
            "Element": "SQ",
            "PrimaryAttribute": f"QID{starting_qid + 4}",
            "SecondaryAttribute": "(Optional) Comment",
            "TertiaryAttribute": None,
            "Payload": {
                "QuestionText": "(Optional) Comment\n",
                "QuestionID": f"QID{starting_qid + 4}",
                "QuestionType": "TE",
                "Selector": "SL",
                "QuestionDescription": "(Optional) Comment",
                "Choices": {"1": {"Display": "Comments"}},
                "ChoiceOrder": ["1"],
                "Validation": {"Settings": {"Type": "None"}},
                "Language": [],
                "SearchSource": {"AllowFreeResponse": "false"},
                "DataExportTag": f"QID{starting_qid + 4}",
                "DataVisibility": {
                    "Private": False,
                    "Hidden": False
                },
                "Configuration": {"QuestionDescriptionOption": "UseText"},
                "NextChoiceId": 2,
                "NextAnswerId": 1,
                "GradingData": [
                    {
                        "TextEntry": "Enter Text Response For Grading",
                        "Grades": {"SC_cSA5rQ5vwEghWzc": "#"}
                    }
                ]
            }
        }
    ]

    if 'SurveyElements' in data:
        data['SurveyElements'].extend(new_questions)
    else:
        data['SurveyElements'] = new_questions

    # 在SurveyElements[0]中的BlockElements中增加对应的问题编号
    if data['SurveyElements'][0]['Payload']:
        # block_id = "2"  # 假设我们要添加到Payload中的第二个block，如果需要动态选择，请修改此处逻辑
        if block_id in data['SurveyElements'][0]['Payload']:
            block_elements = data['SurveyElements'][0]['Payload'][block_id]['BlockElements']
            for i in range(starting_qid, starting_qid + len(new_questions)):
                block_elements.append({"Type": "Question", "QuestionID": f"QID{i}"})
        else:
            print(f"Block ID {block_id} not found in SurveyElements[0]['Payload']")
    else:
        print("Payload not found in SurveyElements[0]")

    return data

import json

def add_overall_questions(data, survey_id, starting_qid, block_id):
    predefined_elements = [
        {
          "SurveyID": survey_id,
          "Element": "SQ",
          "PrimaryAttribute": f"QID{starting_qid}",
          "SecondaryAttribute": " ",
          "TertiaryAttribute": None,
          "Payload": {
            "QuestionText": "<hr width=\"100%\" color=\"black\" size=\"5\">",
            "DataExportTag": f"QID{starting_qid}",
            "QuestionType": "DB",
            "Selector": "TB",
            "DataVisibility": {
              "Private": False,
              "Hidden": False
            },
            "Configuration": {
              "QuestionDescriptionOption": "UseText"
            },
            "QuestionDescription": " ",
            "ChoiceOrder": [],
            "Validation": {
              "Settings": {
                "Type": "None"
              }
            },
            "Language": [],
            "NextChoiceId": 1,
            "NextAnswerId": 1,
            "QuestionID": f"QID{starting_qid}",
          }
        },
        {
            "SurveyID": survey_id,
            "Element": "SQ",
            "PrimaryAttribute": f"QID{starting_qid + 1}",
            "SecondaryAttribute": "As a whole, please rate this entire response from 1 to 5",
            "TertiaryAttribute": None,
            "Payload": {
                "QuestionText": "As a whole, please rate this entire response from 1 to 5",
                "QuestionID": f"QID{starting_qid + 1}",
                "QuestionType": "MC",
                "Selector": "SAHR",
                "SubSelector": "TX",
                "QuestionDescription": "As a whole, please rate this entire response from 1 to 5",
                "Choices": {
                    "1": {"Display": "1"},
                    "2": {"Display": "2"},
                    "3": {"Display": "3"},
                    "4": {"Display": "4"},
                    "5": {"Display": "5"}
                },
                "ChoiceOrder": ["1", "2", "3", "4", "5"],
                "Validation": {
                    "Settings":{
                        "ForceResponse":"ON",
                        "ForceResponseType":"ON",
                        "Type":"None"
                    }
                },
                "Language": [],
                "DataExportTag": f"QID{starting_qid + 1}",
                "DataVisibility": {"Private": False, "Hidden": False},
                "Configuration": {
                    "QuestionDescriptionOption": "UseText",
                    "LabelPosition": "SIDE"
                },
                "NextChoiceId": 6,
                "NextAnswerId": 1,
                "QuestionJS": False
            }
        },
        {
            "SurveyID": survey_id,
            "Element": "SQ",
            "PrimaryAttribute": f"QID{starting_qid + 2}",
            "SecondaryAttribute": "Does this response miss an important point? If yes, please describe what is missing in the commen...",
            "TertiaryAttribute": None,
            "Payload": {
                "QuestionText": "Does this response miss an important point? If yes, please describe what is missing in the comments section below\n",
                "QuestionID": f"QID{starting_qid + 2}",
                "QuestionType": "MC",
                "Selector": "MAHR",
                "SubSelector": "TX",
                "QuestionDescription": "Does this response miss an important point? If yes, please describe what is missing in the commen...",
                "Choices": {
                    "1": {"Display": "Yes"},
                    "2": {"Display": "No"}
                },
                "ChoiceOrder": ["1", "2"],
                "Validation": {"Settings": {"Type": "None"}},
                "Language": [],
                "DataExportTag": f"QID{starting_qid + 2}",
                "DataVisibility": {"Private": False, "Hidden": False},
                "Configuration": {
                    "QuestionDescriptionOption": "UseText",
                    "LabelPosition": "SIDE"
                },
                "NextChoiceId": 3,
                "NextAnswerId": 1
            }
        },
        {
            "SurveyID": survey_id,
            "Element": "SQ",
            "PrimaryAttribute": f"QID{starting_qid + 3}",
            "SecondaryAttribute": "(Optional) Overall Comment",
            "TertiaryAttribute": None,
            "Payload": {
                "QuestionText": "(Optional) Overall Comment\n",
                "QuestionID": f"QID{starting_qid + 3}",
                "QuestionType": "TE",
                "Selector": "SL",
                "QuestionDescription": "(Optional) Overall Comment",
                "Choices": {
                    "1": {"Display": "Comments"}
                },
                "ChoiceOrder": ["1"],
                "Validation": {"Settings": {"Type": "None"}},
                "Language": [],
                "SearchSource": {"AllowFreeResponse": "false"},
                "DataExportTag": f"QID{starting_qid + 3}",
                "DataVisibility": {"Private": False, "Hidden": False},
                "Configuration": {"QuestionDescriptionOption": "UseText"},
                "NextChoiceId": 2,
                "NextAnswerId": 1
            }
        }
    ]

    if 'SurveyElements' in data:
        data['SurveyElements'].extend(predefined_elements)
    else:
        data['SurveyElements'] = predefined_elements

    if data['SurveyElements'][0]['Payload']:
        if block_id in data['SurveyElements'][0]['Payload']:
            block_elements = data['SurveyElements'][0]['Payload'][block_id]['BlockElements']
            for element in predefined_elements:
                if element['Element'] == "SQ":
                    block_elements.append({"Type": "Question", "QuestionID": element['PrimaryAttribute']})
        else:
            print(f"Block ID {block_id} not found in SurveyElements[0]['Payload']")
    else:
        print("Payload not found in SurveyElements[0]")

    return data





In [2]:
def markdown_to_text(markdown_str):
    # Remove markdown specific syntax (bold, italics, headers, etc.)
    plain_text = re.sub(r'(\*\*|\*|_|`|~~|#)', '', markdown_str)
    # Remove extra newlines
    plain_text = re.sub(r'\n\n+', '\n\n', plain_text)
    return plain_text.strip()

In [3]:
# load questions and answers, format them
question_filename = "../data/human_answer_with_models.csv"
csv_data = pd.read_csv(question_filename, header=0)

column_names = ["Edited human answer", "GPT-4-Turbo", "LLaMa-3.1-Instruct 405B",
                "Gemini-1.5 Pro"]

question_count = len(csv_data[column_names[0]])

for column_name in column_names:
    if column_name in csv_data:
        csv_data[column_name] = [markdown_to_text(ans_str) for ans_str in csv_data[column_name]]

for id in range(question_count):
    csv_data["question"][id] = csv_data["question"][id].replace(r"'", r"\\'").replace(r'"', r'\\"').strip()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  csv_data["question"][id] = csv_data["question"][id].replace(r"'", r"\\'").replace(r'"', r'\\"').strip()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  csv_data["question"][id] = csv_data["question"][id].replace(r"'", r"\\'").replace(r'"', r'\\"').strip()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  csv_data["question"][id] = csv_data["question"][id].replace(r"'", r"\\'").replace(r'"', r'\\"').strip()
A value is trying to be set on a copy of a slice from a DataFram

In [4]:
input_file_path = 'layout_template_new.qsf'
output_file_path = 'full_survey.qsf'
output_json_idx_path = 'full_survey.json'

with open(input_file_path, 'r', encoding='utf-8') as file:
    data = json.load(file)

survey_id = data["SurveyEntry"]["SurveyID"]

# load templates
block_element = json.load(open('block_element.qsf'))
block_in_flow = json.load(open('block_in_flow.qsf'))
instruction_box = json.load(open('instruction_box.qsf'))
qid_to_model = {}

# ###################################################
# tot_questions = 20
# tot_advices = 4
# questions = [f"This is question {i}" for i in range(1, tot_questions + 1)]
# advices = [[("Initial response", "This is initial response text")] + [("Advice", f"This is advice text {j}") for j in range(tot_advices)] + [("Final Paragraph", "This is final paragraph text")] for i in range(1, tot_questions + 1)]
# ###################################################

eval_questions = 25
eval_models = len(column_names)

# Generating pages one by one
count_questions = 0
count_page = 0
for id in range(count_page // 4, eval_questions):
    q = csv_data["question"][id]
#     print(q)
    rand_four_idx = np.random.permutation(range(eval_models))
    qid_to_model[count_page // 4 + 1] = [column_names[mid] for mid in rand_four_idx]
    for mid in rand_four_idx:
        advs = csv_data[column_names[mid]][id].split("\n\n")
    
        # add a new block
        q_number = count_page + 1
        block_id = "BL_{:015d}".format(q_number + 1)
        new_block_element = json.loads(
            json.dumps(block_element).replace("[q_number]", str(q_number)).replace("[block_id]", block_id)
        )
        data['SurveyElements'][0]['Payload'][str(q_number + 1)] = new_block_element
    
        # add this block to flow
        fl_number = count_page + 3
        new_block_in_flow = json.loads(
            json.dumps(block_in_flow).replace("[fl_number]", str(fl_number)).replace("[block_id]", block_id)
        )
        data['SurveyElements'][1]['Payload']['Flow'].append(new_block_in_flow)
        data['SurveyElements'][1]['Payload']['Properties']['Count'] += 1
    
        # add question text (header)
        if count_page == 0:
            embed_data = data['SurveyElements'][1]['Payload']['Flow'][0]
            data['SurveyElements'][1]['Payload']['Flow'][0] = json.loads(
                json.dumps(embed_data).replace("[init_question]", q).replace("[pageid]", str(count_page // 4 + 1))
            )
            
        if count_page % 4 == 0:
            prev_q = csv_data["question"][id - 1] if id != 0 else q
            next_q = q
            prev_page_id = str(count_page // 4)
            next_page_id = str(count_page // 4 + 1)            
        elif count_page % 4 == 3:
            prev_q = q
            next_q = csv_data["question"][id + 1] if id != eval_questions - 1 else q
            prev_page_id = str(count_page // 4 + 1)
            next_page_id = str(count_page // 4 + 2) 
        else:
            prev_q = q
            next_q = q
            prev_page_id = str(count_page // 4 + 1)
            next_page_id = str(count_page // 4 + 1) 
        
        # add the instruction box
        qid_count = count_page * 100 + 1 # start from QID1, then QID101, 201, ..., each page take 100 qid quota
        new_instruction_box = json.loads(
            json.dumps(instruction_box)
            .replace("[prev_page_id]", prev_page_id)
            .replace("[next_page_id]", next_page_id)
            .replace("[qid_count]", str(qid_count))
            .replace("[prev_question]", prev_q)
            .replace("[next_question]", next_q)
        )
        data['SurveyElements'].append(new_instruction_box)
        new_block_element['BlockElements'].append({"Type": "Question", "QuestionID": new_instruction_box['PrimaryAttribute']})
    
        # add survey questions
        starting_qid = qid_count + 1
        for ans_text in advs:
            sec = ""
            block_key = str(q_number + 1)
            add_survey_questions(data, survey_id, starting_qid, block_key, sec, ans_text.strip())
            starting_qid += 5
    
        # add overall comments
        starting_qid = qid_count + 96 # QID97, 98, 99, 100
        add_overall_questions(data, survey_id, starting_qid, block_key)
    
        count_page += 1

# write to file
with open(output_file_path, 'w', encoding='utf-8') as output_file:
    json.dump(data, output_file, indent=4, ensure_ascii=False)

with open(output_json_idx_path, 'w') as output_file:
    json.dump(qid_to_model, output_file, indent=4)
    
print(f"Updated JSON data has been saved to {output_file_path}.")


Updated JSON data has been saved to full_survey.qsf.
