In [1]:
import pandas as pd
import dill as pickle
import numpy as np

In [2]:
survey = pd.read_csv("../OSF/survey.csv")
html_form = pd.read_csv("../OSF/survey-html-form.csv", index_col=0)
multi_choice = pd.read_csv("../OSF/survey-multi-choice.csv", index_col=0)
individual_items = pd.read_csv("../processed/quest_main/individual_items.csv", index_col=0)

In [3]:
data_to_check = []
for col in individual_items.columns:
    random_pids = np.random.choice(individual_items[col].dropna().index.values, 2)
    data_to_check.extend([[pid, col] for pid in random_pids])

data_to_check = pd.DataFrame(data_to_check, columns=["pid", "question_id"])

In [4]:
iq_responses = {}
for row_idx, row in multi_choice.iterrows():
    iq_responses[row["pid"]] = eval(row["response"])

In [5]:
crt_responses = {}
for row_idx, row in html_form.iterrows():
    if 'crt' in row["response"]:
        crt_responses[row["pid"]]=eval(row["response"].replace("“","").replace("”", ""))

In [6]:
def get_response(row):
    if "crt" in row["question_id"]:
        answer = crt_responses[row["pid"]][row["question_id"]]
    elif "iq" in row["question_id"]:
        answer = iq_responses[row["pid"]][row["question_id"]]
    else:
        answer = ""
        for response in survey[survey["pid"]==row["pid"]]["response"]:
            try:
                pages = [page for page_idx, page in eval(response).items() if page is not None and row["question_id"] in page]
                responses = [page[row["question_id"]] for page in pages]
                # if pid responded more than once, we have a problem
                assert len(responses) == 1
                answer= responses[0]
            except:
                pass
    return answer

In [7]:
with open("../inputs/questionnaire_files/solutions_OnePart.pkl", "rb") as f:
    scoring = pickle.load(f)

In [8]:
question_scoring = {quest_id : quest_scoring for quest, scoring in scoring.items() for quest_id, quest_scoring in scoring.items()}

In [9]:

crt_quiz_solutions = {"crt1": {".10":"intuitive", "10":"intuitive","005":"correct",".05":"correct", "5":"correct", "":"no response"}, "crt2": {"100":"intuitive",  "5":"correct", "":"no response"}, "crt3": {"24":"intuitive", "47":"correct", "":"no response"}, "crt4": {"9":"intuitive", "4":"correct", "":"no response"}, "crt5": {"30":"intuitive", "29":"correct", "":"no response"},"crt6": {"10":"intuitive","20":"correct", "":"no response"},"crt7": {"is ahead of where he began":"intuitive",  "has lost money":"correct", "":"no response"}}
question_scoring = {**question_scoring, **crt_quiz_solutions}

In [10]:
[key for key in question_scoring.keys() if 'crt' in key]

['crt1', 'crt2', 'crt3', 'crt4', 'crt5', 'crt6', 'crt7']

In [11]:
data_to_check["response"] = data_to_check.apply(lambda row: get_response(row), axis=1)

In [12]:
data_to_check["solutions"] = data_to_check["question_id"].apply(lambda question_id : question_scoring[question_id] if question_id in question_scoring else {})

In [13]:
import json
with open("../inputs/questionnaire_files/questionnaire_OnePart.txt", "r") as f:
    quest_info = json.load(f)

In [14]:
reverse_coded = {quest["question_id"] : quest["reverse_coded"] for quest, info in quest_info.items() for quest in info["questions"]}

In [15]:
data_to_check["reverse_coded"] = data_to_check["question_id"].apply(lambda question_id : reverse_coded[question_id] if question_id in reverse_coded else 0)

In [16]:
#check answer same for whole quest
[quest for quest, scoring in scoring.items() if len(np.unique([str(val) for val in scoring.values()]))!=1]

['AUDIT', 'EAT', 'IQ', 'UPPS-P']

In [18]:
for questionnaire_name, questionnaire_prefix in zip(['AUDIT', 'EAT', 'UPPS-P'], ['alcohol', 'eat', 'uppsp']):
    subset_scores = {col : scoring[questionnaire_name][col] for col in np.unique([question_id for question_id in data_to_check["question_id"] if questionnaire_prefix in question_id])}

    print(len(np.unique([str(val) for val in subset_scores.values()])))

1
1
1


In [19]:
def score_row(row):
    if isinstance(row["solutions"],dict) and row["response"] != "":
        if row["reverse_coded"] == 0:
            if "crt" in row["question_id"]:
                if row["response"] not in row["solutions"]:
                    return 0
                elif row["solutions"][row["response"]] == "correct":
                    return 1
                else:
                    return 0
            else:
                return row["solutions"][row["response"]]
        else:
            reversed_solutions = dict(zip(sorted(row["solutions"].keys()), [row["solutions"][key] for key in sorted(row["solutions"].keys(), reverse=True)]))
            return reversed_solutions[row["response"]]
    else:
        return int(row["solutions"] == row["response"])

In [20]:
data_to_check["score"] = data_to_check.apply(lambda row : score_row(row), axis=1)

In [21]:
individual_items

Unnamed: 0_level_0,alcohol.1,anxiety.1,anxiety.10,anxiety.11,anxiety.12,anxiety.13,anxiety.14,anxiety.15,anxiety.16,anxiety.17,...,uppsp.8,uppsp.9,zung.11,zung.12,zung.13,zung.14,zung.16,zung.17,zung.18,zung.20
pid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
37,2.0,2.0,3.0,3.0,3.0,3.0,2.0,2.0,3.0,3.0,...,2.0,2.0,2.0,1.0,3.0,2.0,2.0,1.0,1.0,2.0
38,0.0,3.0,3.0,3.0,3.0,2.0,3.0,3.0,3.0,2.0,...,2.0,2.0,2.0,2.0,2.0,2.0,3.0,2.0,2.0,3.0
39,1.0,3.0,2.0,2.0,4.0,2.0,3.0,2.0,3.0,2.0,...,2.0,2.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,2.0
40,1.0,2.0,2.0,1.0,1.0,2.0,3.0,1.0,3.0,2.0,...,2.0,1.0,2.0,2.0,1.0,3.0,3.0,3.0,3.0,2.0
41,1.0,1.0,3.0,2.0,2.0,3.0,4.0,2.0,3.0,2.0,...,3.0,3.0,4.0,4.0,1.0,4.0,4.0,4.0,4.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
918,3.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,1.0,2.0,...,4.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
919,0.0,2.0,3.0,2.0,3.0,3.0,3.0,2.0,3.0,3.0,...,2.0,3.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,3.0
920,2.0,2.0,2.0,3.0,4.0,2.0,2.0,1.0,2.0,2.0,...,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0
921,4.0,2.0,3.0,1.0,4.0,3.0,3.0,4.0,3.0,2.0,...,1.0,2.0,3.0,4.0,3.0,3.0,3.0,4.0,3.0,4.0


In [None]:
np.all(data_to_check.apply(lambda row: individual_items.loc[row["pid"], row["question_id"]] == row["score"], axis=1))