In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
from datetime import datetime

from download_tools.plugins.mouselab_mdp import preprocess_mouselab_data, add_click_count_columns
from download_tools.plugins.survey_html_form import process_html_demographics
from download_tools.plugins.survey_multi_choice import score_mouselab_questionnaires, get_mouselab_quiz_name, get_quiz_passer_ids, score_row, score_generic_questionnaires
from download_tools.plugins.survey_text import preprocess_survey_text, get_old_demographics
from download_tools.plugins.utils import get_demo_string

from datetime import datetime
import json
import dill as pickle

In [2]:
# Parameters
simulated = False
sessions = ["c2.1"]
experiment_setting = "high_decreasing"
COST = None
DEPTH = None
html_survey_names = None
old_experiment = False
manual_age_mapping = None
experiment_specific_mapping = None
trials_per_block = {"training": 35}
ranges_to_extract = {"training": "range(25)", "test": "range(25,35)"}
mouselab_column_identifier = None
mouselab_mapping = None
max_attempts = None
passing_score = None
mouselab_quiz_solutions = None
column_mapping = {"trial_type": "trial_type", "time_elapsed": "time_elapsed", "rt": "rt", "pid": "pid", "state_rewards": "state_rewards", "block": "block", "trial_index": "trial_index", "score": "score", "simulation_mode": "simulation_mode", "reward": "rewards", "path": "path", "actions": "actions", "action_time": "action_times", "queries": "queries", "trialTime": "trial_time", "run": "run"}
ground_truth_file = "312_24_4_2_extended"
node_classification = {"early": [1, 5, 9], "middle": [2, 6, 10], "late": [3, 4, 7, 8, 11, 12], "clicks": [1, 5, 9, 2, 6, 10, 3, 4, 7, 8, 11, 12]}
structure = "312_2_4_24"
analysis_run = "c2.1"
data_path = "/home/vfelso/github/planning-depth-differences/data"


In [3]:
# paths to use
inputs_path = Path(data_path).joinpath('inputs')
raw_data_path = Path(data_path).joinpath('raw')
processed_data_path = Path(data_path).joinpath(f'processed/{analysis_run}')
processed_data_path.mkdir(parents=True, exist_ok=True)
cluster_path = Path(data_path).parents[0].joinpath("cluster")

In [4]:
with open(inputs_path.joinpath(f"exp_inputs/rewards/{ground_truth_file}.json")) as json_file:
    ground_truths = json.load(json_file)

In [5]:
# load data
full_data = {}

# read in sessions
for run in sessions:
    for file_path in raw_data_path.glob(f"{run}/*.csv"):
        # don't want to save identifiable bonuses
        # file, information is already in data
        if "bonuses" not in str(file_path):
            file_name = file_path.stem
            curr_data_frame = pd.read_csv(file_path)
            curr_data_frame["run"] = run
            if file_name not in full_data:
                full_data[file_name] = [curr_data_frame]
            else:
                full_data[file_name].append(curr_data_frame)

full_data = {k: pd.concat(v) for k,v in full_data.items()}

In [6]:
if column_mapping:
    full_data["mouselab-mdp"]=full_data["mouselab-mdp"].rename(columns =column_mapping)

In [7]:
mouselab_datas = preprocess_mouselab_data(full_data["mouselab-mdp"],trials_per_block,ground_truths)

# path may contain a bunch of 0s at the start due to miscoding
mouselab_datas["path"] = mouselab_datas["path"].apply(lambda path : eval(path)[-3:])

mouselab_datas = add_click_count_columns(mouselab_datas, node_classification)

In [8]:
#TODO would be nice to refactor this out and import it
def expand_range_dictionary(input_dictionary):
    trial_to_block = {}
    for block, trial_range in input_dictionary.items():
        if isinstance(trial_range, str):
            for trial_index in eval(trial_range):
                trial_to_block[trial_index] = block
        else:
            trial_to_block[block] = expand_range_dictionary(trial_range)
    return trial_to_block

if ranges_to_extract:
    trial_to_block = expand_range_dictionary(ranges_to_extract)

    mouselab_datas["block"] = mouselab_datas.apply(lambda row: trial_to_block[row["run"]][row["trial_index"]] if row["run"] in trial_to_block else trial_to_block[row["trial_index"]], axis=1)

In [9]:
mouselab_datas.to_csv(processed_data_path.joinpath("mouselab-mdp.csv"))