### In this file, the mouse usage data is processed and prepared for data analysis

#### package imports

In [None]:
import json
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import pickle
# "Extra package"
import pingouin as pg
# Vallat, R. (2018). Pingouin: statistics in Python. Journal of Open Source Software, 3(31), 1026,
# https://doi.org/10.21105/joss.01026

#### dataset import

In [None]:
# change your_dataset_name of the name of your raw data json file

with open("your_dataset_name.json") as jsonData:
    dataset = json.load(jsonData)

#### import a list of all study ids who completed the study and were not flagged as outliers/bad cases

In [None]:
# import the ids of the compliant study finished
with open("study_ids_without_bad_cases", "rb") as fp:
    valid_ids = pickle.load(fp)

## Mouse data helper functions

### general functions to handle the mouse data

In [None]:
# Removes mouse movement artifacts -> Artifact if x- & y- coordinate of consecutive movement datapoints are equal
# and if the timestamp of two consecutive movement datapoints are equal
def clean_mouse_data(mouse_data):

    # the mouse data is saved as a dictionary with the a datapoint number as a key and the datapoint as its value
    # first the datadictionary is sorted by the timestamps
    sorted_page_data = sorted(mouse_data.items(), key=lambda x:x[1]["time"])


    key, value1, value2 = "eventType", "MousePositionChanged", "MouseClick"

    # Save the last datapoint
    last_coordinates = [0, 0]
    last_timestamp = 0

    # save the cleaned datapoints
    clean_list = []

    # count the number of removed datapoints and total datapoints
    artifacts = 0
    total_datapoints = 0

    # Loop over all datatuples of the sorted_page_data dictionary
    for data_tuple in sorted_page_data:
        # the second touple is the datapoint, the first tuple the dictionary key
        datapoint = data_tuple[1]
        # if its a mousePositionChanged Datapoint
        if datapoint[key] == value1:
            # and if the x- & y- coordinates are not equal to the previous datapoint or the timestamps are not equal
            if ([datapoint["x"], datapoint["y"]] != last_coordinates) and (datapoint["time"] > last_timestamp):
                # save the datapoint in the clean list
                clean_list.append(datapoint)
                # save the coordinates and the timestamp of the datapoint
                last_coordinates = [datapoint["x"], datapoint["y"]]
                last_timestamp = datapoint["time"]
            else:
                # increase the artifact counter
                # print(last_coordinates, last_timestamp, [datapoint["x"], datapoint["y"]], datapoint["time"])
                artifacts += 1
            # increase the datapoint counter
            total_datapoints += 1
        elif datapoint[key] == value2:
            # if its another datapoint
            clean_list.append(datapoint)

    # get the number of artifacts in percent
    artifact_percentage = (artifacts / total_datapoints) * 100

    # get the median time difference between consecutive mouse data points (if this number is too high, the sample
    # frequency is too low to record mouse movement adequately
    # get the median time difference between consecutive mouse datapoints to get the sampling frequency
    median_time_diff = np.median(np.diff([i["time"] for i in clean_list]))

    # print("Percentage of Artifacts in Movement Data: " + str(art_percentage))

    # return the cleaned mouse data list and the percentage of artifacts
    return clean_list, artifact_percentage, median_time_diff

In [None]:
# Interpolate the Mouse Data into equal time intervals (other researchers interpolate into an equal ammount of
# datapoints per trial (see Yamauchi & Xiao, 2017; Freeman et al. 2009)
# Input is the array with the original mousedatapoint objects
def interpolate_mouse_movement(mouse_data):
    key, value1, value2 = "eventType", "MousePositionChanged", "MouseClick"

    # Seperately save the x- & y-coordinate aswell as the timestamp of each datapoint
    x_pos = []
    y_pos = []
    time = []

    # go trough all mousePositionChanged datapoints
    for datapoint in mouse_data:
        if datapoint[key] == value1:
            # save the x- & y-coordinate and the timestamp of the datapoint
            x_pos.append(datapoint["x"])
            y_pos.append(datapoint["y"])
            time.append(datapoint["time"])

    # if there is not enough data to interpolate, return -99
    if len(x_pos) < 2:
        print("Not enough data for interpolation. The procedure was stopped")
        return -99

    # creates an interpolation function using the x- & y-coordinate and the timeline
    inter_x = scipy.interpolate.interp1d(time, x_pos)
    inter_y = scipy.interpolate.interp1d(time, y_pos)


    # set start and end point of new timeline
    start = time[0]
    end = time[-1]

    # create a new timeline array with equal timesteps using the start and endpoints
    equal_time_intervals = np.arange(start, end, 15)

    # use the interpolation function to calculate the interpolated x- and y-coordinates on the equally spaced time
    # interval
    new_x = np.round(inter_x(equal_time_intervals), decimals=3)
    new_y = np.round(inter_y(equal_time_intervals), decimals=3)

    # bring the seperated dapoints together to a list containing datapoints with the corresponding coordinates and
    # timestamp
    joined_list = [{"x": i, "y": j, "time": k} for i, j, k in zip(new_x, new_y, equal_time_intervals)]

    # visualize the original vs the interpolated mouse data (x- & y-coordinates)
    # plt.plot(xPos, yPos, linestyle="--")
    # plt.plot(new_x, new_y, linestyle=":")
    #
    # fig, ax = plt.subplots()
    # ax.plot(xPos, yPos, 'o', new_x, new_y, '.')
    #
    # plt.show()

    return joined_list

### functions to calculate the mouse usage features

In [None]:
# Get speed, acceleration and higher derivate mouse movement parameters
# This function uses the interpolated mouse data where a datapoint`s structure is
# {x: x-coord., y: y-coord, time: timestamp}
def get_mouse_movement_parameters(mouse_data):

    # time constant between two datapoints
    interpol = 15

    # store the results
    results = {}

    # calculate the euclidean distance between two consecutive mouse datapoints
    distance = [np.sqrt(pow(point_1["x"] - point_0["x"], 2) + pow(point_1["y"] - point_0["y"], 2))
                for point_0, point_1 in zip(mouse_data, mouse_data[1:])]

    results["mouse_dist_"] = np.sum(distance)

    # Calc the speed (dist / time) in pixels per second
    speed = np.array(distance, dtype="f") / 0.015

    results["speed_mean_"] = np.mean(speed)
    results["speed_sd_"] = np.std(speed)

    # calc the velcocity as the change in speed over the change in time = (speed2 - speed1) / 15
    accel = np.diff(speed) / interpol
    # Seperate the acceleration data into positive and negative acceleration
    accel_pos, accel_neg = [i for i in accel if i >= 0], [i for i in accel if i <= 0]

    results["pos_accel_mean"] = np.mean(accel_pos)
    results["pos_accel_sd"] = np.std(accel_pos)
    results["neg_accel_mean"] = np.mean(accel_neg)
    results["neg_accel_sd"] = np.std(accel_neg)

    return results

In [None]:
# Gets the distance to an ideal (straight) line between the start and end of a mouse action
def get_dist_from_ideal_line(mouse_data):

    # Formula derived from https://en.wikipedia.org/wiki/Distance_from_a_point_to_a_line

    # store the deviations from the ideal line
    deviation = []

    # loop over all trials
    for trial in mouse_data:

        interpol_trial = interpolate_mouse_movement(trial)
        if interpol_trial != -99:
            # get the start and end positions of the trial as the start and end markers of the ideal line
            start_point = max(trial, key=lambda x: x["time"])
            end_point = min(trial, key=lambda x: x["time"])
            # loop over all datapoints from a trial
            for datapoint in interpol_trial:
                # calculate the distance of the given datapoint from the ideal line
                num = (end_point["y"] - start_point["y"]) * datapoint["x"] - \
                      (end_point["x"] - start_point["x"]) * datapoint["y"] + \
                      end_point["x"] * start_point["y"] - end_point["y"] * start_point["x"]
                den = np.sqrt(pow((end_point["y"] - start_point["y"]), 2)
                                + pow((end_point["x"] - start_point["x"]), 2))
                dist = abs(num) / den
                # append the distance to the deviation array
                deviation.append(dist)

    results = {"total_dev_ideal_line_": np.sum(deviation),
               "mean_dev_ideal_line_": np.mean(deviation),
               "sd_dev_ideal_line_": np.std(deviation)}

    return results

In [None]:
# calculates the angle between two consecutive mouse movement vectors
def get_mouse_angles(mouse_data):

    # store the x- & y-coordinates of the two previous datapoints to calculate the previous two mouse movement vectors
    coordinates_0 = False
    coordinates_1 = False

    # stores the angle between two vectors (made up of three consecutive datapoints)
    # 0 degrees = straight forward
    # 180 degrees = straight backwars
    # 0 to 90 degrees: forward direction
    # 90 bis 180 degrees: backward direction
    angle3points = []

    # loop over all movement datapoints
    for datapoint in mouse_data:
        # if there are values for the previous two datapoints
        if coordinates_0 and coordinates_1:
            # calculate the vector between the recent datapoint and the previous datapoint
            vector1 = np.array([datapoint["x"], datapoint["y"]]) - np.array(coordinates_1)
            # calculate the vector between the previous datapoint and the previous previous datapoint
            vector2 = np.array(coordinates_1) - np.array(coordinates_0)
            # calculate the angle between two consecutive vectors (range -180 to 180)
            # from https://stackoverflow.com/questions/2827393/angles-between-two-n-dimensional-vectors-in-python
            angle = np.arctan2(np.linalg.det([vector1, vector2]), np.dot(vector1, vector2))

            # save the angle in the array
            angle3points.append(np.degrees(abs(angle)))
        # if a previous datapoint exists
        if coordinates_1:
            # set the previous datapoint to be the previous previous datapoint
            coordinates_0 = coordinates_1
        # set this datapoint to be the previous datapoint
        coordinates_1 = [datapoint["x"], datapoint["y"]]

    results = {"angle_mean_": np.mean(angle3points), "angle_sd_": np.std(angle3points)}

    return results

In [None]:
# calculate the number of directional changes on the x-axis and the sample entropy of the x- & y-coordinate vector
def get_x_y_flips_and_sample_entropy(mouse_data):

    # function to calculate the number of flips (changing of direction on the axis)
    # alternative oneliner: flips = sum(array[i+1] * array[i] < 0 for i in range(len(array)-1)) cant be used because
    # this would ignore cases where there is movement in one direction, then no movement and then movement in the
    # opposite direction!
    def _calc_flips(array):

        # store the number of flips
        flips = 0
        # store the coordinates before and after a 0 movement on the axis
        coord_before_0 = 0
        coord_after_0 = 0

        # loop through all datapoints in the array
        for i in range(len(array)-1):
            # calculate the product of consecutive datapoints
            comp = array[i + 1] * array[i]
            # if the product is smaller than 0, a shift happened
            if comp < 0:
                flips += 1
            # if the product is excatly 0
            if comp == 0:
                # if the first datapoint is not 0
                if array[i] != 0:
                    # save it
                    coord_before_0 = array[i]
                # if the second datapoint is not 0
                elif array[i + 1] != 0:
                    # save it
                    coord_after_0 = array[i + 1]
                    # if the product of the datapoint before and after the 0 event is less than 0, a shift happened
                    if coord_before_0 * coord_after_0 < 0:
                        flips += 1

        return flips

    # get all x- & y-coordinates seperately
    x_coordinates = [datapoint["x"] for datapoint in mouse_data]
    y_coordinates = [datapoint["y"] for datapoint in mouse_data]

    # calculate the shifts in x- & y-coordinates (difference between the coordinates)
    x_shifts = np.diff(x_coordinates)
    y_shifts = np.diff(y_coordinates)

    # Get the number of x- & y-flips
    x_flips = _calc_flips(x_shifts)
    y_flips = _calc_flips(y_shifts)


    results = {"x_flips_": x_flips, "y_flips_": y_flips}

    return results

In [None]:
# get the task times
def get_task_times(mouse_task_data, mouse_data):

    # get the total time of the task
    task_time = (mouse_task_data["taskEnded"] - mouse_task_data["trialStarted"]) / 1000

### Helper Function to visualize the mouse usage

In [None]:
def visualize_mouse_movement(data, participant, task, sceen_props):

    key, value, value2 = "eventType", "MousePositionChanged", "MouseClick"

    # saves the movement data seperately to plot x against y (as positions on the screen)
    mouse_move_x_values = []
    mouse_move_y_values = []

    # saves the coordinates of the mouseclicks to plot them as x and y positions on the screen
    mouse_click_x_values = []
    mouse_click_y_values = []

    # loop over all datapoints
    for datapoint in data:
        # this if is a "hack" to only get the task data and remove the mouse data before the actual task in the
        # practice phase: Remove it for a multipurpose solution!
        if datapoint["practice"] == False:
            # if its a movement point save the coordinates to the movement arrays
            if key in datapoint and datapoint[key] == value:
                mouse_move_x_values.append(datapoint["x"])
                mouse_move_y_values.append(datapoint["y"])
            # if its a click save the coordinates to the click array
            elif key in datapoint and datapoint[key] == value2:
                mouse_click_x_values.append(datapoint["x"])
                mouse_click_y_values.append(datapoint["y"])

    # create a scatterplot using the movement and click data

    sceen_width = sceen_props["width"]
    screen_height = sceen_props["height"]

    # get the dpi of the screen
    mydpi = 96
    # set the fig size to the size and dpi of the screen
    fig = plt.figure(figsize=((sceen_width / mydpi) / 2.5, (screen_height / mydpi) / 2.5), dpi=mydpi)

    # create a list with numbers in ascending order corresponding to the lenth of the movement data, this list is
    # used to give each movement datapoint a unique color on the color map of the plot
    colors = np.arange(len(mouse_move_x_values))

    # get the axis of the plot and set its size to the screen size
    axes = plt.gca()
    axes.set_ylim([screen_height, 0])
    axes.set_xlim([0, sceen_width])

    # output a scatterplot of the movement data, each datapoint gets a color corresponing to its number in the color
    # list according to a color map (default color map is used)
    # s=(72. / fig.dpi) ** 2 --> size of a dot is equal to one pixel (used a higher number, because dots are really
    # small in the small pictures
    plt.scatter(mouse_move_x_values, mouse_move_y_values, c=colors, marker="o", s=((72. / fig.dpi) ** 2) * 5)

    # plot the click datapoints aswell
    plt.scatter(mouse_click_x_values, mouse_click_y_values, color="black", marker="o", s=((72. / fig.dpi) ** 2) * 5)

    # give the plot a title
    # plt.title(task + "_" + participant)

    # hide the axis
    plt.axis("off")

    # show the plot
    plt.show()

    # save the figure in the corresponding folder (bbox_inches: tight removes some unnecessary whitespace)
    if "Con" in task:
        plt.savefig('Mouse_Usage_Images/Cond/' + task + "_" + str(participant) + '.png', bbox_inches="tight")
    else:
        plt.savefig('Mouse_Usage_Images/Prac/Slider_Task/' + task + "_" + str(participant) + '.png', bbox_inches="tight")

    plt.close()

### Helper Function that calculates all mouse parameters for each mouse task

In [None]:
# helper function that calculates all the mouse features (parameters) depending on the task, its input is the
# cleaned (preprocessed) mouse data and the task name
def get_parameters_pipeline(data, task):

    output = {}

    # helper function to output the variable with the taskname
    def _output_with_taskname(results, task):

        named_dict = {}

        for dict_key in results:

            new_key = dict_key + "_" + task

            named_dict[new_key] = results[dict_key]

        return named_dict

    # --- task specific calculations helper functions ---

    # Get the task data per task
    def _get_task_data(mouse_data, mouse_task):

        task_data = -99
        # get the mouse data for the point and click task
        if "PointClick" in mouse_task:

            # get all datapoints that are not in the practice condition and that are after the first and before
            # the last clicked circle ("real task data")
            task_data = [datapoint for datapoint in mouse_data if 0 < datapoint["circlesClicked"] < 16 and datapoint["practice"] == False]


        elif "DragDrop" in mouse_task:

            # get the drag and drop data starting from the "touch of the first circle to drag"
            task_data = [datapoint for datapoint in mouse_data if
                         (datapoint["dragging"] or 0 < datapoint["circleNumber"]) and datapoint["practice"] == False
                         and datapoint["circlesDragged"] < 12]

        elif "Slider" in mouse_task:

            # get the slider data starting from end of the first slide to the end of the last slide (the first slide
            # is thrown away, because the data doesnt allow to check when the first slide event started
            task_data = [datapoint for datapoint in mouse_data if
                          0 < datapoint["sliderNum"] < 11 and datapoint["practice"] == False]

        elif "FollowBox" in mouse_task:

            # get the follow box data starting as soon as the circle starts moving (the mouse was moved inside the
            # circle
            task_data = [datapoint for datapoint in mouse_data if
                         datapoint["taskStarted"] and datapoint["practice"] == False]


        return task_data

    # calculate task specific mouse parameters
    def _get_task_specific_params(mouse_data, mouse_task):

        params = {}

        if "PointClick" in mouse_task:

            # group the data by trial
            trial_data = [list(g) for k, g in groupby(task_data, operator.itemgetter("circlesClicked"))]

            dist_from_ideal_line = get_dist_from_ideal_line(trial_data)
            params.update(dist_from_ideal_line)

        elif "DragDrop" in mouse_task:

            # get the successful drop trials and calculate the distance to an ideal line from the start to the end of
            # the drag

            # split the data in trials by the circles dragged number
            trial_data = [list(g) for k, g in groupby(task_data, operator.itemgetter("circlesDragged"))]

            # get the "successfull drag and drop trial" (when the circle was dropped in the target)
            successfull_trials = [[list(g) for k, g in groupby(trial, operator.itemgetter("circleNumber"))][-1]
                                  for trial in trial_data]

            # only get the data when the circle is dragged to the target
            dragging_data = [[datapoint for datapoint in trial if datapoint["dragging"]] for trial in successfull_trials]

            dis_from_ideal_line = get_dist_from_ideal_line(dragging_data)
            params.update(dis_from_ideal_line)

        elif "FollowBox" in mouse_task:

            # calculate the percentage of time the mouse was inside the moving circle
            in_target_ratio = sum([1 for i in mouse_data if i["inBox"]]) / len(mouse_data)
            params["In_target_ratio_"] = in_target_ratio

        # note: there are no task specific parameters for the slider task

        return params

    # --- generate the dataset with the mouse parameters ---

    # get the relevant task data
    task_data = _get_task_data(data, task)

    # get the time working on the task (in seconds)
    on_task_duration = (task_data[-1]["time"] - task_data[0]["time"]) / 1000
    output["Working_time_" + task] = on_task_duration

    # get the interpolated task data to calculate the task parameters
    interpol_mouse = interpolate_mouse_movement(task_data)

    # get the standard mouse parameters
    movement_param = get_mouse_movement_parameters(interpol_mouse)
    output.update(_output_with_taskname(movement_param, task))

    angles = get_mouse_angles(interpol_mouse)
    output.update(_output_with_taskname(angles, task))

    flips = get_x_y_flips_and_sample_entropy(interpol_mouse)
    output.update(_output_with_taskname(flips, task))

    # get task specific mouse parameters
    task_specific_params = _get_task_specific_params(task_data, task)
    output.update(_output_with_taskname(task_specific_params, task))

    return output

## Inspection of the mouse usage data

In [None]:
# Inspect the mouse data for artifacts, data recording errors and non-task-compliance
# In my view of the data, almost all artifacts were caused by multiple recordings
# of the same data points and are easily filterable without losing information about the mouse movement. In few cases
# consecutive different mouse movement datapoints are assigned the same timestamp value and if deleted cause information
# loss. If the median time difference between consecutive mouse movement data points was greater than 50ms it was likely
# that movement information was lost and that the tracking from the remaining data is not reliable enough. Those cases
# therefore should be removed

def inspect_mouse_data(task, median_task_duration):

    results = {}

    print("Median task length:", median_task_duration)

    for participant in dataset:

        if participant in valid_ids:

            results[participant] = {}

            # print("Processing participant " + participant)

            # get the total task duration of the task (in seconds)
            total_task_duration = (dataset[participant][task]["data"]["taskEnded"] -
                                   dataset[participant][task]["data"]["trialStarted"]) / 1000

            # get the mouse data of the task
            mouse_data = dataset[participant][task]["data"]["TrackerData"]

            # get the screen props of the mouse task
            screen_props = dataset[participant][task]["MetaData"]["screenProps"]

            # get the total number of collected mouse datapoints in the task
            non_cleaned_datapoints = len(mouse_data)

            # clean the mouse data, get the artifacts and the median time diff between consecutive data points
            cleaned_mouse_data, artifacts, median_time_diff = clean_mouse_data(mouse_data)

            # save the mouse data info
            results[participant]["total_task_duration"] = total_task_duration
            results[participant]["total_datapoints"] = non_cleaned_datapoints
            results[participant]["cleaned_datapoints"] = len(cleaned_mouse_data)
            results[participant]["artifacts"] = artifacts
            results[participant]["time_diff"] = median_time_diff

            # flag potentially if the average time difference between consecutive cleaned datapoints is bigger than 50
            # ms (might indicate problems with recording and represents too sparse data to measure mouse movement
            # precisely) or the task time exceeds the median task duration of the task
            if median_time_diff > 50:
                results[participant]["flagged"] = True
                print(participant + " flagged for time diff")
                visualize_mouse_movement(cleaned_mouse_data, participant, task, screen_props)
            elif total_task_duration > 3 * median_task_duration:
                results[participant]["flagged"] = True
                print(participant + " flagged for task length", total_task_duration)
                visualize_mouse_movement(cleaned_mouse_data, participant, task, screen_props)
            else:
                results[participant]["flagged"] = False

    return pd.DataFrame(results).T

In [None]:
# get the mouse task times of all tasks to inspect potential problematic cases (participant was not able to
# work on the task as intended)

all_tasks = ["Con_PointClick", "Con_FollowBox", "Con_DragDrop", "Con_Slider", "Pr_DragDrop", "Pr_FollowBox",
             "Pr_PointClick", "Pr_Slider"]


task_times = {}

for participant in dataset:

    if participant in valid_ids:

        task_times[participant] = {}

        for task in all_tasks:
            # get the total task duration of the task (in seconds)
            total_task_duration = (dataset[participant][task]["data"]["taskEnded"] -
                                   dataset[participant][task]["data"]["trialStarted"]) / 1000

            task_times[participant][task] = total_task_duration

task_times_df = pd.DataFrame(task_times).T

#### Point-and-click task

In [None]:
# practice
bad_point_pointclick_pr = inspect_mouse_data("Pr_PointClick", np.median(task_times_df["Pr_PointClick"]))
bad_point_pointclick_pr = bad_point_pointclick_pr.loc[bad_point_pointclick_pr["flagged"]]

# 3 task duration cases (2 of them might have moved to a different tab or away from the task
# yCPGZjqEddQjRipelOCElNn1H5f2, qqV2fQsapTNzwcL1RJu9lUI11Ck1

In [None]:
#condition
bad_point_pointclick_con = inspect_mouse_data("Con_PointClick", np.median(task_times_df["Con_PointClick"]))
bad_point_pointclick_con = bad_point_pointclick_con.loc[bad_point_pointclick_con["flagged"]]

# 4 cases with task duration:
# case zjtqowMpRGdjB88qWwbSolpD2dh2 might have done something else in between?
# case OwNTQbaCM9SELk6VKts0dO0Qylq1 has conspicious mouse behavior

#### Drag-and-drop task

In [None]:
# practice
bad_point_dragdrop_pr = inspect_mouse_data("Pr_DragDrop", np.median(task_times_df["Pr_DragDrop"]))
bad_point_dragdrop_pr = bad_point_dragdrop_pr.loc[bad_point_dragdrop_pr["flagged"]]

# suspicious ammount of mouse movement (altough as intended for cases:
# 06OMljv8cMRWXdQzI0PcuHVLTnQ2, wLy4JC91JdVds9KqgfxWbCBwvmf2, Gg1FgyYHw2UmOMvAgdlMUjE9sZs1
# Gg1FgyYHw2UmOMvAgdlMUjE9sZs1 might have moved out of the task window

In [None]:
# condition
bad_point_dragdrop_con = inspect_mouse_data("Con_DragDrop", np.median(task_times_df["Con_DragDrop"]))
bad_point_dragdrop_con = bad_point_dragdrop_con.loc[bad_point_dragdrop_con["flagged"]]

# suspicious ammount of mouse movement in cases (in some cases with large ammount of task time, it looks like the mouse
# was moved out of the window (and something different was done?):
# mouse moved out: S9mOBlikykOORuC8pgJ6c3mW9Kf2, yCPGZjqEddQjRipelOCElNn1H5f2, 5ScJnvKYFge4YTnDZnuVfLkAQ8q2
# much movement: 9G86uiBnATSN2ppi7AgxeKYOHLh1

#### Follow-the-circle task

In [None]:
# practice
bad_point_followbox_pr = inspect_mouse_data("Pr_FollowBox", np.median(task_times_df["Pr_FollowBox"]))
bad_point_followbox_pr = bad_point_followbox_pr.loc[bad_point_followbox_pr["flagged"]]

# because the task starts after the mouse was moved inside the box, task length cases moved the mouse
# late into the box (task movement looks okay in both cases)

In [None]:
# condition
bad_point_followbox_con = inspect_mouse_data("Con_FollowBox", np.median(task_times_df["Con_FollowBox"]))
bad_point_followbox_con = bad_point_followbox_con.loc[bad_point_followbox_con["flagged"]]

# movement looks okay, participants most likely took a break before starting the task (mouse moves
# away from the window in some cases)

#### Slider task

In [None]:
# practice
bad_point_slider_pr = inspect_mouse_data("Pr_Slider", np.median(task_times_df["Pr_Slider"]))
bad_point_slider_pr = bad_point_slider_pr.loc[bad_point_slider_pr["flagged"]]

# lots of data in cases: a5YdxOGLaTgm9yLTnlWoxv6rzvZ2, EH3QFZl1YyNA5luTBfjIREX4Y853, Cwns86ssz5WRRSfalFbYwjFAHC63

In [None]:
# condition
bad_point_slider_con = inspect_mouse_data("Con_Slider", np.median(task_times_df["Con_Slider"]))
bad_point_slider_con = bad_point_slider_con.loc[bad_point_slider_con["flagged"]]

# cases 4J3MqvwhpoOk44S1bzvoLdM87yj1, zgr9oYA1JBWGt2UrOK3sHfDo0qJ2 and VDp9nabg5mXtjfaVwzULz5fcJij1 most likely
# moved away from the task?
# many datapoints for wLy4JC91JdVds9KqgfxWbCBwvmf2

#### conclusion:
 <ul>
  <li>mouse tracking seemed to have worked</li>
  <li>some participants might have had difficulties doing the task</li>
  <li>some participants might have took a break during the task</li>
</ul> 

### After inspecting the mouse usage data and getting a feeling for it, calculate the mouse usage features

In [None]:
# Calculate the mouse paramaters per task (both the data from the practice and the data from the actual
# condition per task are saved in the same dataframe for further analysis

def create_mouse_analysis_dataset(mousetask):

    # dictionary for the mouse parameters
    results = {}

    # dictionary for the time-series analysis
    time_series_data = {}

    for participant in dataset:

        # for task in tasks:

        if participant in valid_ids:


            results[participant] = {}

            print("Processing participant " + participant)


            # process the data of the practice task and the actual task
            tasks = ["Pr_" + mousetask, "Con_" + mousetask]

            for task in tasks:

                # get the total task duration of the task (in seconds)
                total_task_duration = (dataset[participant][task]["data"]["taskEnded"] -
                                       dataset[participant][task]["data"]["trialStarted"]) / 1000
                results[participant]["Task_duration_" + task] = total_task_duration

                # get the mouse data of the task
                mouse_data = dataset[participant][task]["data"]["TrackerData"]

                # clean the mouse data, get the artifacts and the median time difference
                cleaned_mouse_data, artifact_number, median_time_diff = clean_mouse_data(mouse_data)

                # check if the mouse data was properly recorded and if the task time was in the acceptable range
                if median_time_diff <= 50 and total_task_duration <= 3 * np.median(task_times_df[task]):

                    # Visualize the mouse usage data and save the image for further image analysis
                    # comment out if not necessary!
                    
                    # get the screen props of the mouse task
                    screen_props = dataset[participant][task]["MetaData"]["screenProps"]
                    # get the condition (high-stress or low-stress): for classification
                    cond = str(dataset[participant]["ExperimentMetaData"]["condition"])
                    # get the arousal rating of the participant: for regression
                    arousal_rating = dataset[participant][task[:len(task) - len(mousetask)] + "Sam_" + mousetask]["data"]["samArousal"]
                    # get the valence rating of the participant: for regression
                    valence_rating = dataset[participant][task[:len(task) - len(mousetask)] + "Sam_" + mousetask]["data"]["samValence"]
                    # visualize the mouse usage and save the images in a folder for image analysis
                    visualize_mouse_movement(cleaned_mouse_data,
                                             participant + "_" + cond + "_+" + str(arousal_rating) + "_+"
                                             + str(valence_rating), task, screen_props)

                    
                    # calculate the mouse parameters!
                    parameters = get_parameters_pipeline(cleaned_mouse_data, task)
                    results[participant].update(parameters)
                    
                else:
                    if total_task_duration > 3 * np.median(task_times_df[task]):
                        print("Participant " + participant + " took too long for the " + task)
                    elif median_time_diff <= 50:
                        print("Participant " + participant + " has not enough valid mouse data in the " + task)

                # get the Arousal and Valence as continous dependent variables
                results[participant][task[:2] + "_valence"] = dataset[participant][task[:len(task) - len(mousetask)] + "Sam_" + mousetask]["data"]["samArousal"]
                results[participant][task[:2] + "_arousal"] = dataset[participant][task[:len(task) - len(mousetask)] + "Sam_" + mousetask]["data"]["samValence"]

                # get the MDBF values
                mdbf = {}

                # recode some items of the MDBF scale
                items_to_recode = ["MDBF_angespannt", "MDBF_nervoes", "MDBF_schlaefrig", "MDBF_ungluecklick",
                                   "MDBF_unzufrieden"]

                # get the values of the mdbf items
                for item in dataset[participant][task[:len(task) - len(mousetask)] + "Mdbf"]["data"]:
                    value = dataset[participant][task[:len(task) - len(mousetask)] + "Mdbf"]["data"][item]
                    if item in items_to_recode:
                        mdbf[item] = 4 - value
                    else:
                        mdbf[item] = value

                # get the MDBF scale values
                results[participant][task[:2] + "_MDBF_GS"] = (mdbf["MDBF_wohl"] + mdbf["MDBF_gut"] +
                                                               mdbf["MDBF_ungluecklich"] + mdbf["MDBF_ruhig"]) / 4
                results[participant][task[:2] + "_MDBF_RU"] = (mdbf["MDBF_ausgeglichen"] + mdbf["MDBF_gut"] +
                                                               mdbf["MDBF_angespannt"] + mdbf["MDBF_nervoes"]) / 4
                results[participant][task[:2] + "_MDBF_WM"] = (mdbf["MDBF_frisch"] + mdbf["MDBF_wach"] +
                                                               mdbf["MDBF_schlaefrig"] + mdbf["MDBF_ermattet"]) / 4

                # get the self-reported stress level
                results[participant][task[:2] + "_stress"] = dataset[participant][task[:len(task) - len(mousetask)] + "Mdbf"]["data"]["stress"]

            # get the condition
            results[participant]["condition"] = dataset[participant]["ExperimentMetaData"]["condition"]

    # create the dataframe
    df = pd.DataFrame(results).T

    # drop na cases
    df = df.dropna()

    return df

In [None]:
# helper function to save the final mouse feature dataframe as a csv file
def save_dataframe_as_csv(dataframe, task):
    dataframe.to_csv("mouse_data_" + task + "_" + datetime.date.today().isoformat() + ".csv",
                     sep="\t", encoding="utf-8", index_label="ID")
    print("Data successfully saved")

### save the calculated mouse usage features per task

In [None]:
# create and save a dataframe for the Point and Click Task
point_Click_frame = create_mouse_analysis_dataset("PointClick")
save_dataframe_as_csv(point_Click_frame, "PointClick")

In [None]:
# create and save a dataframe for the Drag and Drop Task
dragdrop_frame = create_mouse_analysis_dataset("DragDrop")
save_dataframe_as_csv(dragdrop_frame, "DragDrop")

In [None]:
# create and save a dataframe for the Follow Circle Task
followbox_frame = create_mouse_analysis_dataset("FollowBox")
save_dataframe_as_csv(followbox_frame, "FollowBox")

In [None]:
# create and save a dataframe for the Slider Task
slider_frame = create_mouse_analysis_dataset("Slider")
save_dataframe_as_csv(slider_frame, "Slider")