In [1]:
%matplotlib inline

import ast
import json
import os
import sys
from datetime import datetime

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import lines
from matplotlib import patches

import warnings
warnings.filterwarnings("ignore")


AYS_PIDS = [
      "0Odg8Ya9cQZM",
      "3Bl1l9qPo03v",
      "9BJDMrSg5GU2",
      "AtYKOM9xQoxo",
      "bvxCS2xslVD1",
      "bzdxtTFRW7bn",
      "Ck5zEa9jS8kr",
      "EPcW0vyMy901",
      "gzgUnyhhYvag",
      "kK7uC15Oxl7j",
      "WAiLZBXejqs6",
      "WBoN8OgUq9kM",
      "xrQxT9kE7trL",
]

SUM_PIDS = [
      "7YPaDt3k6zUJ",
      "9oAsM1Zsd2ZL",
      "BDsZoXY2KvCq",
      "cBx0ECEdnCXE",
      "DMMPwdh9JyIJ",
      "F8aaNjmqipm9",
      "iLi4QFTFUW6W",
      "KB4R9vQpgvDE",
      "n8dCqYoZqOtZ",
      "oa0dujnJJIQK",
      "Q3GODcr8rSU2",
      "Xb1da3tmq51s",
      "XK6R4nlN8DL9",
      "Xm4gLrMYjXLp",
]

RT_PIDS = [
      "3jCj03ExwbiR",
      "3LjPd26Bv20B",
      "7zspHOjpyZg6",
      "qBEgmP5F3go8",
      "RJozqLvmzUql",
      "SM3fpzR19vuY",
      "tqnTUO7mgIPw",
      "VSG8tghjg06Z",
      "wYKNw3brGUCM",
      "yJrWi90sw5Fv",
      "ZDOTBhGZ9oS9",
]

RTSUM_PIDS = [
      "1V7m8wY9JNak",
      "606gnDVdNwnt",
      "7UfOha66klmh",
      "DAn39rXtnCAS",
      "ggYz115dy0a9",
      "joO2OX6p9rr6",
      "rJwpqBcwhEar",
      "UPGk3s3Qldql",
      "vEiIrVSiezFn",
      "w6ptnTrqQi3D",
      "wJm4COpLVvz5",
      "zV0yHZRI3MH9"
]

ALL_PIDS = {
    'CTRL': AYS_PIDS,
    'SUM': SUM_PIDS,
    'RT': RT_PIDS,
    'RTSUM': RTSUM_PIDS,
}

In [None]:
condition = "CTRL"
task = "politics"
pid = ALL_PIDS[condition][0]
basepath = os.path.join(condition, pid)  # basepath for PID
df_ts = pd.read_csv(os.path.join(basepath, "timestamps.csv"))
task_start_ts = df_ts.set_index("activities").at[f"task-{task}", "epoch_timestamp"]
task_end_ts = df_ts.set_index("activities").at[f"live-{task}", "epoch_timestamp"]
df = pd.read_csv(os.path.join(basepath, f"interaction.csv"))
df = df[df["appMode"] == task].reset_index(drop=True).drop(columns=["appMode"])
if pid == "EPcW0vyMy901":
    df["interactionAt"] -= 7 * 3600000  # summary.json is GMT-07:00 for some reason
else:
    df["interactionAt"] -= 4 * 3600000  # convert EPOCH timestamp from GMT to EDT (GMT-04:00)

### HIGH-LEVEL STATS ###

print(f"{pid}")
print(f"Total Time (min): {(task_end_ts - task_start_ts) / 1000 / 60}")
print(f"Number of Interactions: {len(df.index)}")

In [None]:
condition = "CTRL"
task = "politics"
pid = ALL_PIDS[condition][0]
basepath = os.path.join(condition, pid)  # basepath for PID
df_ts = pd.read_csv(os.path.join(basepath, "timestamps.csv"))
task_start_ts = df_ts.set_index("activities").at[f"task-{task}", "epoch_timestamp"]
task_end_ts = df_ts.set_index("activities").at[f"live-{task}", "epoch_timestamp"]
df = pd.read_csv(os.path.join(basepath, f"bias_interaction.csv"))
df = df[df["appMode"] == task].reset_index(drop=True).drop(columns=["appMode"])
if pid == "EPcW0vyMy901":
    df["interactionAt"] -= 7 * 3600000  # summary.json is GMT-07:00 for some reason
else:
    df["interactionAt"] -= 4 * 3600000  # convert EPOCH timestamp from GMT to EDT (GMT-04:00)

### DPC/DPD METRIC ###

df_metric = df.set_index((df['interactionAt'].rename('Minutes Elapsed') - task_start_ts) / 1000 / 60)
df_metric[['dpc_metric', 'dpd_metric']].plot(drawstyle="steps-post", style='.-')

In [None]:
condition = "CTRL"
task = "politics"
pid = ALL_PIDS[condition][0]
basepath = os.path.join(condition, pid)  # basepath for PID
df_ts = pd.read_csv(os.path.join(basepath, "timestamps.csv"))
task_start_ts = df_ts.set_index("activities").at[f"task-{task}", "epoch_timestamp"]
task_end_ts = df_ts.set_index("activities").at[f"live-{task}", "epoch_timestamp"]
df = pd.read_csv(os.path.join(basepath, f"bias_interaction.csv"))
df = df[df["appMode"] == task].reset_index(drop=True).drop(columns=["appMode"])
if pid == "EPcW0vyMy901":
    df["interactionAt"] -= 7 * 3600000  # summary.json is GMT-07:00 for some reason
else:
    df["interactionAt"] -= 4 * 3600000  # convert EPOCH timestamp from GMT to EDT (GMT-04:00)

### AD METRIC ###

drop_cols = {"politics": ["id", "first_name", "last_name"], "movies": ["id", "Title"]}
ad_raw = (
    df.set_index((df["interactionAt"].rename("Minutes Elapsed") - task_start_ts) / 1000 / 60)["ad_metric"]
    .ffill()
    .bfill()
)
df_attr_ad = (
    (
        pd.concat(
            [
                pd.Series(
                    ad_raw.iloc[0],
                    index=pd.Index([0], name="Minutes Elapsed"),
                    name=ad_raw.name,
                ),
                ad_raw.append(
                    pd.Series(
                        ad_raw.iloc[-1],
                        index=pd.Index([(task_end_ts - task_start_ts) / 1000 / 60], name="Minutes Elapsed"),
                        name=ad_raw.name,
                    )
                ),
            ]
        )
    )
    .map(lambda x: ast.literal_eval(str(x)))
    .apply(pd.Series)
    .drop(columns=drop_cols[task])
)
df_attr_ad.plot(drawstyle="steps-post", subplots=True, layout=(3,3), sharex=True, sharey=True, style='.-')

In [None]:
condition = "CTRL"
task = "politics"
pid = ALL_PIDS[condition][0]
basepath = os.path.join(condition, pid)  # basepath for PID
df_ts = pd.read_csv(os.path.join(basepath, "timestamps.csv"))
task_start_ts = df_ts.set_index("activities").at[f"task-{task}", "epoch_timestamp"]
task_end_ts = df_ts.set_index("activities").at[f"live-{task}", "epoch_timestamp"]
df = pd.read_csv(os.path.join(basepath, f"bias_interaction.csv"))
df = df[df["appMode"] == task].reset_index(drop=True).drop(columns=["appMode"])
if pid == "EPcW0vyMy901":
    df["interactionAt"] -= 7 * 3600000  # summary.json is GMT-07:00 for some reason
else:
    df["interactionAt"] -= 4 * 3600000  # convert EPOCH timestamp from GMT to EDT (GMT-04:00)

### Vis/List/Card click/hover rolling counts ###

# VIS HOVER
df_vis_hover = df[df['interactionType'] == 'mouseout']
if not df_vis_hover.empty:
    count_x = (
        df_vis_hover['x'].map(lambda x: ast.literal_eval(str(x)))
            .apply(pd.Series)
            .rename(columns={'name':'x_attr', 'value': 'x_value'})
            .groupby('x_attr')['x_attr']
            .rolling(1)
            .count()
            .unstack()
            .fillna(0.0).T
    )
    count_y = (
        df_vis_hover['y'].map(lambda x: ast.literal_eval(str(x)))
            .apply(pd.Series)
            .rename(columns={'name':'y_attr', 'value': 'y_value'})
            .groupby('y_attr')['y_attr']
            .rolling(1)
            .count()
            .unstack()
            .fillna(0.0).T
    )
    rolling_vis_hover_count = (
        count_x.add(count_y, fill_value=0)
            .fillna(0.0)
            .set_index((df_vis_hover['interactionAt'].rename('Minutes Elapsed') - task_start_ts) / 1000 / 60)
    )
else:
    rolling_vis_hover_count = pd.DataFrame(None, index=pd.Index([], name="Minutes Elapsed"))
print(rolling_vis_hover_count)

# VIS CLICK ADD
df_vis_click_add = df[df['interactionType'] == 'add_to_list_via_scatterplot_click']
if not df_vis_click_add.empty:
    count_x = (
        df_vis_click_add['x'].map(lambda x: ast.literal_eval(str(x)))
            .apply(pd.Series)
            .rename(columns={'name':'x_attr', 'value': 'x_value'})
            .groupby('x_attr')['x_attr']
            .rolling(1)
            .count()
            .unstack()
            .fillna(0.0).T
    )
    count_y = (
        df_vis_click_add['y'].map(lambda x: ast.literal_eval(str(x)))
            .apply(pd.Series)
            .rename(columns={'name':'y_attr', 'value': 'y_value'})
            .groupby('y_attr')['y_attr']
            .rolling(1)
            .count()
            .unstack()
            .fillna(0.0).T
    )
    rolling_vis_click_add_count = (
        count_x.add(count_y, fill_value=0)
            .fillna(0.0)
            .set_index((df_vis_click_add['interactionAt'].rename('Minutes Elapsed') - task_start_ts) / 1000 / 60)
    )
else:
    rolling_vis_click_add_count = pd.DataFrame(None, index=pd.Index([], name="Minutes Elapsed"))
print(rolling_vis_click_add_count)

# LIST HOVER
df_list_hover = df[df['interactionType'] == 'mouseout_from_list']
if not df_list_hover.empty:
    count_x = (
        df_list_hover['x'].map(lambda x: ast.literal_eval(str(x)))
            .apply(pd.Series)
            .rename(columns={'name':'x_attr', 'value': 'x_value'})
            .groupby('x_attr')['x_attr']
            .rolling(1)
            .count()
            .unstack()
            .fillna(0.0).T
    )
    count_y = (
        df_list_hover['y'].map(lambda x: ast.literal_eval(str(x)))
            .apply(pd.Series)
            .rename(columns={'name':'y_attr', 'value': 'y_value'})
            .groupby('y_attr')['y_attr']
            .rolling(1)
            .count()
            .unstack()
            .fillna(0.0).T
    )
    rolling_list_hover_count = (
        count_x.add(count_y, fill_value=0)
            .fillna(0.0)
            .set_index((df_list_hover['interactionAt'].rename('Minutes Elapsed') - task_start_ts) / 1000 / 60)
    )
else:
    rolling_list_hover_count = pd.DataFrame(None, index=pd.Index([], name="Minutes Elapsed"))
print(rolling_list_hover_count)

# LIST CLICK REMOVE
df_list_click_remove = df[df['interactionType'] == 'remove_from_list_via_list_item_click']
if not df_list_click_remove.empty:
    count_x = (
        df_list_click_remove['x'].map(lambda x: ast.literal_eval(str(x)))
            .apply(pd.Series)
            .rename(columns={'name':'x_attr', 'value': 'x_value'})
            .groupby('x_attr')['x_attr']
            .rolling(1)
            .count()
            .unstack()
            .fillna(0.0).T
    )
    count_y = (
        df_list_click_remove['y'].map(lambda x: ast.literal_eval(str(x)))
            .apply(pd.Series)
            .rename(columns={'name':'y_attr', 'value': 'y_value'})
            .groupby('y_attr')['y_attr']
            .rolling(1)
            .count()
            .unstack()
            .fillna(0.0).T
    )
    rolling_list_click_remove_count = (
        count_x.add(count_y, fill_value=0)
            .fillna(0.0)
            .set_index((df_list_click_remove['interactionAt'].rename('Minutes Elapsed') - task_start_ts) / 1000 / 60)
    )
else:
    rolling_list_click_remove_count = pd.DataFrame(None, index=pd.Index([], name="Minutes Elapsed"))
print(rolling_list_click_remove_count)

# CARD CLICK ADD
df_card_click_add = df[df['interactionType'] == 'add_to_list_via_card_click']
if not df_card_click_add.empty:
    count_x = (
        df_card_click_add['x'].map(lambda x: ast.literal_eval(str(x)))
            .apply(pd.Series)
            .rename(columns={'name':'x_attr', 'value': 'x_value'})
            .groupby('x_attr')['x_attr']
            .rolling(1)
            .count()
            .unstack()
            .fillna(0.0).T
    )
    count_y = (
        df_card_click_add['y'].map(lambda x: ast.literal_eval(str(x)))
            .apply(pd.Series)
            .rename(columns={'name':'y_attr', 'value': 'y_value'})
            .groupby('y_attr')['y_attr']
            .rolling(1)
            .count()
            .unstack()
            .fillna(0.0).T
    )
    rolling_card_click_add_count = (
        count_x.add(count_y, fill_value=0)
            .fillna(0.0)
            .set_index((df_card_click_add['interactionAt'].rename('Minutes Elapsed') - task_start_ts) / 1000 / 60)
    )
else:
    rolling_card_click_add_count = pd.DataFrame(None, index=pd.Index([], name="Minutes Elapsed"))
print(rolling_card_click_add_count)

# CARD CLICK REMOVE
df_card_click_remove = df[df['interactionType'] == 'remove_from_list_via_card_click']
if not df_card_click_remove.empty:
    count_x = (
        df_card_click_remove['x'].map(lambda x: ast.literal_eval(str(x)))
            .apply(pd.Series)
            .rename(columns={'name':'x_attr', 'value': 'x_value'})
            .groupby('x_attr')['x_attr']
            .rolling(1)
            .count()
            .unstack()
            .fillna(0.0).T
    )
    count_y = (
        df_card_click_remove['y'].map(lambda x: ast.literal_eval(str(x)))
            .apply(pd.Series)
            .rename(columns={'name':'y_attr', 'value': 'y_value'})
            .groupby('y_attr')['y_attr']
            .rolling(1)
            .count()
            .unstack()
            .fillna(0.0).T
    )
    rolling_card_click_remove_count = (
        count_x.add(count_y, fill_value=0)
            .fillna(0.0)
            .set_index((df_card_click_remove['interactionAt'].rename('Minutes Elapsed') - task_start_ts) / 1000 / 60)
    )
else:
    rolling_card_click_remove_count = pd.DataFrame(None, index=pd.Index([], name="Minutes Elapsed"))
print(rolling_card_click_remove_count)

In [None]:
condition = "CTRL"
task = "politics"
pid = ALL_PIDS[condition][0]
basepath = os.path.join(condition, pid)  # basepath for PID
df_ts = pd.read_csv(os.path.join(basepath, "timestamps.csv"))
task_start_ts = df_ts.set_index("activities").at[f"task-{task}", "epoch_timestamp"]
task_end_ts = df_ts.set_index("activities").at[f"live-{task}", "epoch_timestamp"]
df = pd.read_csv(os.path.join(basepath, f"interaction.csv"))
df = df[df["appMode"] == task].reset_index(drop=True).drop(columns=["appMode"])
if pid == "EPcW0vyMy901":
    df["interactionAt"] -= 7 * 3600000  # summary.json is GMT-07:00 for some reason
else:
    df["interactionAt"] -= 4 * 3600000  # convert EPOCH timestamp from GMT to EDT (GMT-04:00)

### Filter Changed ###

mask = df['interactionType'] == 'filter_changed'
df_filter_changed = df[mask]
if len(df_filter_changed.index) > 0:
    df_filter_changed = df_filter_changed[['attribute']]
    rolling_filter_changed_count = (
        df_filter_changed.groupby(['attribute'])['attribute']
            .rolling(1)
            .count()
            .unstack()
            .reset_index()
            .set_index('attribute')
            .T
            .join(df[['interactionAt']], how='inner')
            .assign(**{'Minutes Elapsed': lambda x: (x['interactionAt'] - task_start_ts) / 1000 / 60})
            .drop(columns=['interactionAt'])
    )
    df_filter_changed = (
        pd.melt(
            rolling_filter_changed_count, 
            id_vars=['Minutes Elapsed'], 
            value_vars=list(rolling_filter_changed_count.columns).remove('Minutes Elapsed'),
            var_name='attribute'
        )
        .dropna()
        .assign(measurement = lambda x: 'filter_changed')[['attribute', 'measurement', 'Minutes Elapsed', 'value']]
    )
else:
    df_filter_changed = pd.DataFrame(None, columns=["attribute", "measurement", "Minutes Elapsed", "value"])
print(df_filter_changed)

In [None]:
condition = "CTRL"
task = "politics"
pid = ALL_PIDS[condition][0]
basepath = os.path.join(condition, pid)  # basepath for PID
df_ts = pd.read_csv(os.path.join(basepath, "timestamps.csv"))
task_start_ts = df_ts.set_index("activities").at[f"task-{task}", "epoch_timestamp"]
task_end_ts = df_ts.set_index("activities").at[f"live-{task}", "epoch_timestamp"]
df = pd.read_csv(os.path.join(basepath, f"interaction.csv"))
df = df[df["appMode"] == task].reset_index(drop=True).drop(columns=["appMode"])
if pid == "EPcW0vyMy901":
    df["interactionAt"] -= 7 * 3600000  # summary.json is GMT-07:00 for some reason
else:
    df["interactionAt"] -= 4 * 3600000  # convert EPOCH timestamp from GMT to EDT (GMT-04:00)
    
### SUBMISSION MARKERS ###

ATTRIBUTES = {
    "politics": [
        "party",
        "gender",
        "occupation",
        "age",
        "political_experience",
        "policy_strength_ban_abortion_after_6_weeks",
        "policy_strength_legalize_medical_marijuana",
        "policy_strength_increase_medicare_funding",
        "policy_strength_ban_alcohol_sales_sundays",
    ],
    "movies": [
        "Running Time",
        "Production Budget",
        "Worldwide Gross",
        "Rotten Tomatoes Rating",
        "IMDB Rating",
        "Genre",
        "Creative Type",
        "Content Rating",
        "Release Year",
    ],
}
mask = df['interactionType'].isin(["click_submit_button_initial", "click_submit_button_revise", "click_submit_button_final"])
click_submit = df[mask][['interactionAt', 'interactionType']]
dfs = []
for _, row in click_submit.iterrows():
    dfs.append(
        pd.DataFrame(
            [
                [attr, row["interactionType"], (row["interactionAt"] - task_start_ts) / 1000 / 60, 1.0]
                for attr in ATTRIBUTES[task]
            ],
            columns=["attribute", "measurement", "Minutes Elapsed", "value"],
        )
    )
df_submit = pd.concat(dfs).reset_index(drop=True)
print(df_submit)

In [None]:
condition = "CTRL"
task = "politics"
pid = ALL_PIDS[condition][0]
basepath = os.path.join(condition, pid)  # basepath for PID
df_ts = pd.read_csv(os.path.join(basepath, "timestamps.csv"))
task_start_ts = df_ts.set_index("activities").at[f"task-{task}", "epoch_timestamp"]
task_end_ts = df_ts.set_index("activities").at[f"live-{task}", "epoch_timestamp"]
df = pd.read_csv(os.path.join(basepath, f"interaction.csv"))
df = df[df["appMode"] == task].reset_index(drop=True).drop(columns=["appMode"])
if pid == "EPcW0vyMy901":
    df["interactionAt"] -= 7 * 3600000  # summary.json is GMT-07:00 for some reason
else:
    df["interactionAt"] -= 4 * 3600000  # convert EPOCH timestamp from GMT to EDT (GMT-04:00)

### Attribute Set ###

mask = df["interactionType"] == "axes_attribute_changed"
attr_changed = df[mask]
if len(attr_changed.index) > 0:
    attr_changed = attr_changed[["interactionAt", "interactionType", "x", "y"]]
    x_changed = pd.concat(
        [
            attr_changed[["interactionAt", "interactionType", "x"]],
            attr_changed["x"].ne(attr_changed["x"].shift()).astype(int).rename("changed"),
        ],
        axis=1,
    )
    x_changed = (
        x_changed[x_changed["changed"] != 0]
        .drop(columns=["changed"])
        .assign(measurement=lambda x: "x_axis_set")
        .rename(columns={"x": "attribute"})
    )
    x_changed_dup = pd.DataFrame(np.repeat(x_changed.values, 2, axis=0))
    x_changed_dup.columns = x_changed.columns
    df_x_changed = pd.concat(
        [
            x_changed_dup[["attribute", "measurement"]],
            x_changed_dup["interactionAt"].shift(-1).fillna(task_end_ts),
        ],
        axis=1,
    ).dropna(subset=["attribute"])
    df_x_changed["value"] = df_x_changed.index % 2 == 0
    y_changed = pd.concat(
        [
            attr_changed[["interactionAt", "interactionType", "y"]],
            attr_changed["y"].ne(attr_changed["y"].shift()).astype(int).rename("changed"),
        ],
        axis=1,
    )
    y_changed = (
        y_changed[y_changed["changed"] != 0]
        .drop(columns=["changed"])
        .assign(measurement=lambda x: "y_axis_set")
        .rename(columns={"y": "attribute"})
    )
    y_changed_dup = pd.DataFrame(np.repeat(y_changed.values, 2, axis=0))
    y_changed_dup.columns = y_changed.columns
    df_y_changed = pd.concat(
        [
            y_changed_dup[["attribute", "measurement"]],
            y_changed_dup["interactionAt"].shift(-1).fillna(task_end_ts),
        ],
        axis=1,
    ).dropna(subset=["attribute"])
    df_y_changed["value"] = df_y_changed.index % 2 == 0
    axis_set = (
        pd.concat([df_x_changed, df_y_changed])
            .sort_values(by=["attribute", "measurement", "interactionAt"])
            .reset_index(drop=True)
            .assign(**{'Minutes Elapsed': lambda x: (x['interactionAt'] - task_start_ts) / 1000 / 60})
            .drop(columns=['interactionAt'])
    )
    axis_set = axis_set[['attribute', 'measurement', 'Minutes Elapsed', 'value']]
else:
    axis_set = pd.DataFrame(None, columns=["attribute", "measurement", "Minutes Elapsed", "value"])
print(axis_set)

In [9]:
condition = "RTSUM"  # CTRL, SUM, RT, RTSUM
task = "movies"  # politics, movies
print(f"{condition} - {task}")

PIDS = ALL_PIDS[condition]
for i in range(len(PIDS)):
    pid = PIDS[i]  # get pid
    print(f'  ({i+1}/{len(PIDS)}) building chart for {pid}')
    basepath = os.path.join(condition, pid)  # basepath for PID
    df_ts = pd.read_csv(os.path.join(basepath, "timestamps.csv"))
    task_start_ts = df_ts.set_index("activities").at[f"task-{task}", "epoch_timestamp"]
    task_end_ts = df_ts.set_index("activities").at[f"live-{task}", "epoch_timestamp"]

    ### ALL LIVE INTERACTIONS ###

    df = pd.read_csv(os.path.join(basepath, f"interaction.csv"))
    df = df[df["appMode"] == task].reset_index(drop=True).drop(columns=["appMode"])
    if pid == "EPcW0vyMy901":
        df["interactionAt"] -= 7 * 3600000  # summary.json is GMT-07:00 for some reason
    else:
        df["interactionAt"] -= 4 * 3600000  # convert EPOCH timestamp from GMT to EDT (GMT-04:00)

    # Filter Changed
    mask = df['interactionType'] == 'filter_changed'
    df_filter_changed = df[mask]
    if len(df_filter_changed.index) > 0:
        df_filter_changed = df_filter_changed[['attribute']]
        rolling_filter_changed_count = (
            df_filter_changed.groupby(['attribute'])['attribute']
                .rolling(1)
                .count()
                .unstack()
                .reset_index()
                .set_index('attribute')
                .T
                .join(df[['interactionAt']], how='inner')
                .assign(**{'Minutes Elapsed': lambda x: (x['interactionAt'] - task_start_ts) / 1000 / 60})
                .drop(columns=['interactionAt'])
        )
        df_filter_changed = (
            pd.melt(
                rolling_filter_changed_count, 
                id_vars=['Minutes Elapsed'], 
                value_vars=list(rolling_filter_changed_count.columns).remove('Minutes Elapsed'),
                var_name='attribute'
            )
            .dropna()
            .assign(measurement = lambda x: 'filter_changed')[['attribute', 'measurement', 'Minutes Elapsed', 'value']]
        )
    else:
        df_filter_changed = pd.DataFrame(None, columns=["attribute", "measurement", "Minutes Elapsed", "value"])
        
    # Attribute Set
    mask = df["interactionType"] == "axes_attribute_changed"
    attr_changed = df[mask]
    if len(attr_changed.index) > 0:
        attr_changed = attr_changed[["interactionAt", "interactionType", "x", "y"]]
        x_changed = pd.concat(
            [
                attr_changed[["interactionAt", "interactionType", "x"]],
                attr_changed["x"].ne(attr_changed["x"].shift()).astype(int).rename("changed"),
            ],
            axis=1,
        )
        x_changed = (
            x_changed[x_changed["changed"] != 0]
            .drop(columns=["changed"])
            .assign(measurement=lambda x: "x_axis_set")
            .rename(columns={"x": "attribute"})
        )
        x_changed_dup = pd.DataFrame(np.repeat(x_changed.values, 2, axis=0))
        x_changed_dup.columns = x_changed.columns
        df_x_changed = pd.concat(
            [
                x_changed_dup[["attribute", "measurement"]],
                x_changed_dup["interactionAt"].shift(-1).fillna(task_end_ts),
            ],
            axis=1,
        ).dropna(subset=["attribute"])
        df_x_changed["value"] = df_x_changed.index % 2 == 0
        y_changed = pd.concat(
            [
                attr_changed[["interactionAt", "interactionType", "y"]],
                attr_changed["y"].ne(attr_changed["y"].shift()).astype(int).rename("changed"),
            ],
            axis=1,
        )
        y_changed = (
            y_changed[y_changed["changed"] != 0]
            .drop(columns=["changed"])
            .assign(measurement=lambda x: "y_axis_set")
            .rename(columns={"y": "attribute"})
        )
        y_changed_dup = pd.DataFrame(np.repeat(y_changed.values, 2, axis=0))
        y_changed_dup.columns = y_changed.columns
        df_y_changed = pd.concat(
            [
                y_changed_dup[["attribute", "measurement"]],
                y_changed_dup["interactionAt"].shift(-1).fillna(task_end_ts),
            ],
            axis=1,
        ).dropna(subset=["attribute"])
        df_y_changed["value"] = df_y_changed.index % 2 == 0
        axis_set = (
            pd.concat([df_x_changed, df_y_changed])
                .sort_values(by=["attribute", "measurement", "interactionAt"])
                .reset_index(drop=True)
                .assign(**{'Minutes Elapsed': lambda x: (x['interactionAt'] - task_start_ts) / 1000 / 60})
                .drop(columns=['interactionAt'])
        )
        axis_set = axis_set[['attribute', 'measurement', 'Minutes Elapsed', 'value']]
    else:
        axis_set = pd.DataFrame(None, columns=["attribute", "measurement", "Minutes Elapsed", "value"])
        
    # SUBMIT BUTTON CLICKS
    ATTRIBUTES = {
        "politics": [
            "party",
            "gender",
            "occupation",
            "age",
            "political_experience",
            "policy_strength_ban_abortion_after_6_weeks",
            "policy_strength_legalize_medical_marijuana",
            "policy_strength_increase_medicare_funding",
            "policy_strength_ban_alcohol_sales_sundays",
        ],
        "movies": [
            "Running Time",
            "Production Budget",
            "Worldwide Gross",
            "Rotten Tomatoes Rating",
            "IMDB Rating",
            "Genre",
            "Creative Type",
            "Content Rating",
            "Release Year",
        ],
    }
    mask = df['interactionType'].isin(["click_submit_button_initial", "click_submit_button_revise", "click_submit_button_final"])
    click_submit = df[mask][['interactionAt', 'interactionType']]
    dfs = []
    for _, row in click_submit.iterrows():
        dfs.append(
            pd.DataFrame(
                [
                    [attr, row["interactionType"], (row["interactionAt"] - task_start_ts) / 1000 / 60, 1.0]
                    for attr in ATTRIBUTES[task]
                ],
                columns=["attribute", "measurement", "Minutes Elapsed", "value"],
            )
        )
    df_submit = pd.concat(dfs).reset_index(drop=True)
    
    ### BIAS INTERACTIONS ###

    df_bias = pd.read_csv(os.path.join(basepath, f"bias_interaction.csv"))
    df_bias = df_bias[df_bias["appMode"] == task].reset_index(drop=True).drop(columns=["appMode"])
    if pid == "EPcW0vyMy901":
        df_bias["interactionAt"] -= 7 * 3600000  # summary.json is GMT-07:00 for some reason
    else:
        df_bias["interactionAt"] -= 4 * 3600000  # convert EPOCH timestamp from GMT to EDT (GMT-04:00)

    # AD METRIC
    drop_cols = {"politics": ["id", "first_name", "last_name"], "movies": ["id", "Title"]}
    ad_raw = (
        df.set_index((df["interactionAt"].rename("Minutes Elapsed") - task_start_ts) / 1000 / 60)["ad_metric"]
        .ffill()
        .bfill()
    )
    df_attr_ad = (
        (
            pd.concat(
                [
                    pd.Series(
                        ad_raw.iloc[0],
                        index=pd.Index([0], name="Minutes Elapsed"),
                        name=ad_raw.name,
                    ),
                    ad_raw.append(
                        pd.Series(
                            ad_raw.iloc[-1],
                            index=pd.Index([(task_end_ts - task_start_ts) / 1000 / 60], name="Minutes Elapsed"),
                            name=ad_raw.name,
                        )
                    ),
                ]
            )
        )
        .map(lambda x: ast.literal_eval(str(x)))
        .apply(pd.Series)
        .drop(columns=drop_cols[task])
    )
    
    # VIS HOVER
    df_vis_hover = df[df['interactionType'] == 'mouseout']
    if not df_vis_hover.empty:
        count_x = (
            df_vis_hover['x'].map(lambda x: ast.literal_eval(str(x)))
                .apply(pd.Series)
                .rename(columns={'name':'x_attr', 'value': 'x_value'})
                .groupby('x_attr')['x_attr']
                .rolling(1)
                .count()
                .unstack()
                .fillna(0.0).T
        )
        count_y = (
            df_vis_hover['y'].map(lambda x: ast.literal_eval(str(x)))
                .apply(pd.Series)
                .rename(columns={'name':'y_attr', 'value': 'y_value'})
                .groupby('y_attr')['y_attr']
                .rolling(1)
                .count()
                .unstack()
                .fillna(0.0).T
        )
        rolling_vis_hover_count = (
            count_x.add(count_y, fill_value=0)
                .fillna(0.0)
                .set_index((df_vis_hover['interactionAt'].rename('Minutes Elapsed') - task_start_ts) / 1000 / 60)
        )
    else:
        rolling_vis_hover_count = pd.DataFrame(None, index=pd.Index([], name="Minutes Elapsed"))

    # VIS CLICK ADD
    df_vis_click_add = df[df['interactionType'] == 'add_to_list_via_scatterplot_click']
    if not df_vis_click_add.empty:
        count_x = (
            df_vis_click_add['x'].map(lambda x: ast.literal_eval(str(x)))
                .apply(pd.Series)
                .rename(columns={'name':'x_attr', 'value': 'x_value'})
                .groupby('x_attr')['x_attr']
                .rolling(1)
                .count()
                .unstack()
                .fillna(0.0).T
        )
        count_y = (
            df_vis_click_add['y'].map(lambda x: ast.literal_eval(str(x)))
                .apply(pd.Series)
                .rename(columns={'name':'y_attr', 'value': 'y_value'})
                .groupby('y_attr')['y_attr']
                .rolling(1)
                .count()
                .unstack()
                .fillna(0.0).T
        )
        rolling_vis_click_add_count = (
            count_x.add(count_y, fill_value=0)
                .fillna(0.0)
                .set_index((df_vis_click_add['interactionAt'].rename('Minutes Elapsed') - task_start_ts) / 1000 / 60)
        )
    else:
        rolling_vis_click_add_count = pd.DataFrame(None, index=pd.Index([], name="Minutes Elapsed"))

    # LIST HOVER
    df_list_hover = df[df['interactionType'] == 'mouseout_from_list']
    if not df_list_hover.empty:
        count_x = (
            df_list_hover['x'].map(lambda x: ast.literal_eval(str(x)))
                .apply(pd.Series)
                .rename(columns={'name':'x_attr', 'value': 'x_value'})
                .groupby('x_attr')['x_attr']
                .rolling(1)
                .count()
                .unstack()
                .fillna(0.0).T
        )
        count_y = (
            df_list_hover['y'].map(lambda x: ast.literal_eval(str(x)))
                .apply(pd.Series)
                .rename(columns={'name':'y_attr', 'value': 'y_value'})
                .groupby('y_attr')['y_attr']
                .rolling(1)
                .count()
                .unstack()
                .fillna(0.0).T
        )
        rolling_list_hover_count = (
            count_x.add(count_y, fill_value=0)
                .fillna(0.0)
                .set_index((df_list_hover['interactionAt'].rename('Minutes Elapsed') - task_start_ts) / 1000 / 60)
        )
    else:
        rolling_list_hover_count = pd.DataFrame(None, index=pd.Index([], name="Minutes Elapsed"))

    # LIST CLICK REMOVE
    df_list_click_remove = df[df['interactionType'] == 'remove_from_list_via_list_item_click']
    if not df_list_click_remove.empty:
        count_x = (
            df_list_click_remove['x'].map(lambda x: ast.literal_eval(str(x)))
                .apply(pd.Series)
                .rename(columns={'name':'x_attr', 'value': 'x_value'})
                .groupby('x_attr')['x_attr']
                .rolling(1)
                .count()
                .unstack()
                .fillna(0.0).T
        )
        count_y = (
            df_list_click_remove['y'].map(lambda x: ast.literal_eval(str(x)))
                .apply(pd.Series)
                .rename(columns={'name':'y_attr', 'value': 'y_value'})
                .groupby('y_attr')['y_attr']
                .rolling(1)
                .count()
                .unstack()
                .fillna(0.0).T
        )
        rolling_list_click_remove_count = (
            count_x.add(count_y, fill_value=0)
                .fillna(0.0)
                .set_index((df_list_click_remove['interactionAt'].rename('Minutes Elapsed') - task_start_ts) / 1000 / 60)
        )
    else:
        rolling_list_click_remove_count = pd.DataFrame(None, index=pd.Index([], name="Minutes Elapsed"))

    # CARD CLICK ADD
    df_card_click_add = df[df['interactionType'] == 'add_to_list_via_card_click']
    if not df_card_click_add.empty:
        count_x = (
            df_card_click_add['x'].map(lambda x: ast.literal_eval(str(x)))
                .apply(pd.Series)
                .rename(columns={'name':'x_attr', 'value': 'x_value'})
                .groupby('x_attr')['x_attr']
                .rolling(1)
                .count()
                .unstack()
                .fillna(0.0).T
        )
        count_y = (
            df_card_click_add['y'].map(lambda x: ast.literal_eval(str(x)))
                .apply(pd.Series)
                .rename(columns={'name':'y_attr', 'value': 'y_value'})
                .groupby('y_attr')['y_attr']
                .rolling(1)
                .count()
                .unstack()
                .fillna(0.0).T
        )
        rolling_card_click_add_count = (
            count_x.add(count_y, fill_value=0)
                .fillna(0.0)
                .set_index((df_card_click_add['interactionAt'].rename('Minutes Elapsed') - task_start_ts) / 1000 / 60)
        )
    else:
        rolling_card_click_add_count = pd.DataFrame(None, index=pd.Index([], name="Minutes Elapsed"))

    # CARD CLICK REMOVE
    df_card_click_remove = df[df['interactionType'] == 'remove_from_list_via_card_click']
    if not df_card_click_remove.empty:
        count_x = (
            df_card_click_remove['x'].map(lambda x: ast.literal_eval(str(x)))
                .apply(pd.Series)
                .rename(columns={'name':'x_attr', 'value': 'x_value'})
                .groupby('x_attr')['x_attr']
                .rolling(1)
                .count()
                .unstack()
                .fillna(0.0).T
        )
        count_y = (
            df_card_click_remove['y'].map(lambda x: ast.literal_eval(str(x)))
                .apply(pd.Series)
                .rename(columns={'name':'y_attr', 'value': 'y_value'})
                .groupby('y_attr')['y_attr']
                .rolling(1)
                .count()
                .unstack()
                .fillna(0.0).T
        )
        rolling_card_click_remove_count = (
            count_x.add(count_y, fill_value=0)
                .fillna(0.0)
                .set_index((df_card_click_remove['interactionAt'].rename('Minutes Elapsed') - task_start_ts) / 1000 / 60)
        )
    else:
        rolling_card_click_remove_count = pd.DataFrame(None, index=pd.Index([], name="Minutes Elapsed"))

    # Combine bias interaction measurements into single dataframe
    d = {
        'ad_metric' : df_attr_ad, 
        'vis_hover': rolling_vis_hover_count,
        'vis_click_add': rolling_vis_click_add_count,
        'list_hover': rolling_list_hover_count,
        'list_click_remove': rolling_list_click_remove_count,
        'card_click_add': rolling_card_click_add_count,
        'card_click_remove': rolling_card_click_remove_count,
    }
    df_bias_interactions = (
        pd.concat(
            d.values(), 
            axis=1, 
            keys=d.keys()
        )
        .unstack()
        .swaplevel(0, 1)
        .reset_index()
        .rename(
            columns={
                0:"value", 
                "x_attr": "attribute", 
                "y_attr": "attribute", 
                "level_0":"attribute", 
                "level_1":"measurement"
            }
        )
    )

    ### COMBINE BIAS INTERACTIONS AND ALL LIVE INTERACTIONS DATAFRAMES ###

    df_interactions = (
        pd.concat(
            [
                df_bias_interactions,
                df_filter_changed,
                axis_set,
                df_submit
            ]
        ).reset_index(drop=True)
    )
    
    ### VISUALIZE RESULTING DATAFRAME ###
    
    plot_titles = {
        'politics': {
            'age': 'Age',
            'gender': 'Gender',
            'occupation': 'Occupation',
            'party': 'Party',
            'political_experience': 'Experience',
            'policy_strength_ban_abortion_after_6_weeks': 'Ban Abortion',
            'policy_strength_legalize_medical_marijuana': 'Legalize Medical Marijuana',
            'policy_strength_increase_medicare_funding': 'Increase Medicare Funding',
            'policy_strength_ban_alcohol_sales_sundays': 'Ban Alcohol on Sundays'
        },
        'movies': {
            "Running Time": "Running Time",
            "Production Budget": "Production Budget",
            "Worldwide Gross": "Worldwide Gross",
            "Rotten Tomatoes Rating": "Rotten Tomatoes Rating",
            "IMDB Rating": "IMDB Rating",
            "Genre": "Genre",
            "Creative Type": "Creative Type",
            "Content Rating": "Content Rating",
            "Release Year": "Release Year",
        }
    }

    total_rows = 3
    total_cols = 3
    fig, ax = plt.subplots(nrows=3, ncols=3, sharex=True, sharey=True, figsize=(12,12))
    plt.rcParams['figure.dpi'] = 200 # 200 e.g. is really fine, but slower

    countrow = 0
    countcol = 0
    for key, grp in df_interactions.groupby(['attribute']):
        ax_curr = ax[countrow][countcol]
        ax_curr.set_xlim([0, (task_end_ts - task_start_ts) / 1000 / 60])
        ax_curr.set_ylim([-0.05, 1.05])
        ax_curr.set_ylabel('AD Metric [0, 1]')
        grp_pivot = grp.pivot(index='Minutes Elapsed', columns='measurement', values='value')
        grp_pivot['ad_metric'].plot(
            ax=ax_curr, 
            drawstyle="steps-post", 
            color="blue", 
            style="-", 
            title=plot_titles[task][key], 
            legend=None
        )
        if ('click_submit_button_initial' in grp_pivot.columns):
            ax[countrow][countcol].axvline(
                grp_pivot['click_submit_button_initial'].dropna().index[0], linestyle=":", color="orange"
            )
        if ('click_submit_button_revise' in grp_pivot.columns):
            ax[countrow][countcol].axvline(
                grp_pivot['click_submit_button_revise'].dropna().index[0], color="red"
            )
        if ('click_submit_button_final' in grp_pivot.columns):
            ax[countrow][countcol].axvline(
                grp_pivot['click_submit_button_final'].dropna().index[0], linestyle=":", color="black"
            )
        if ('x_axis_set' in grp_pivot.columns):
            grp_x_axis_set = grp_pivot['x_axis_set'].dropna()  # only keep non-nan values
            for index, value in grp_x_axis_set.items():
                if value:
                    start = index  # value == True => filter_added
                else:
                    stop = index  # value == False => filter_removed
                    ax[countrow][countcol].axvspan(start, stop, 0.825, 0.925, color="lightgreen", alpha=0.5)
        if ('y_axis_set' in grp_pivot.columns):
            grp_y_axis_set = grp_pivot['y_axis_set'].dropna()  # only keep non-nan values
            for index, value in grp_y_axis_set.items():
                if value:
                    start = index  # value == True => filter_added
                else:
                    stop = index  # value == False => filter_removed
                    ax[countrow][countcol].axvspan(start, stop, 0.825, 0.925, color="lightgreen", alpha=0.5)
        if ('filter_changed' in grp_pivot.columns):
            grp_filter_changed = grp_pivot['filter_changed'].dropna()  # only keep non-nan values
            markers, stems, base = ax[countrow][countcol].stem(
                grp_filter_changed.index, grp_filter_changed, linefmt='grey', markerfmt=' ', basefmt=" "
            )
            plt.setp(stems, 'linewidth', 0.5)
        if ('vis_hover' in grp_pivot.columns):
            grp_vis_hovers = grp_pivot['vis_hover'].dropna()
            markers, stems, base = ax[countrow][countcol].stem(
                grp_vis_hovers.index, grp_vis_hovers, linefmt='lightblue', markerfmt=' ', basefmt=" "
            )
            plt.setp(stems, 'linewidth', 0.5)
        if ('vis_click_add' in grp_pivot.columns):
            grp_vis_click_add = grp_pivot['vis_click_add'].dropna().replace(0.0, np.nan).replace(1.0, 1.015)
            markers, stems, base = ax[countrow][countcol].stem(
                grp_vis_click_add.index, grp_vis_click_add, linefmt='lightblue', markerfmt='or', basefmt=" "
            )
            plt.setp(stems, 'linewidth', 0.5)
            plt.setp(markers, markersize = 3)
        if ('list_hover' in grp_pivot.columns):
            grp_list_hover = grp_pivot['list_hover'].dropna()
            markers, stems, base = ax[countrow][countcol].stem(
                grp_list_hover.index, grp_list_hover, linefmt='pink', markerfmt=' ', basefmt=" "
            )
            plt.setp(stems, 'linewidth', 0.5)
        if ('list_click_remove' in grp_pivot.columns):
            grp_list_click_remove = grp_pivot['list_click_remove'].dropna().replace(0.0, np.nan).replace(1.0, 0.985)
            markers, stems, base = ax[countrow][countcol].stem(
                grp_list_click_remove.index, grp_list_click_remove, linefmt='pink', markerfmt='Xr', basefmt=" "
            )
            plt.setp(stems, 'linewidth', 0.5)
            plt.setp(markers, markersize = 4)
        if ('card_click_add' in grp_pivot.columns):
            grp_card_click_add = grp_pivot['card_click_add'].dropna().replace(0.0, np.nan).replace(1.0, 1.015)
            markers, stems, base = ax[countrow][countcol].stem(
                grp_card_click_add.index, grp_card_click_add, linefmt='green', markerfmt='or', basefmt=" "
            )
            plt.setp(stems, 'linewidth', 0.5)
            plt.setp(markers, markersize = 3)
        if ('card_click_remove' in grp_pivot.columns):
            grp_card_click_remove = grp_pivot['card_click_remove'].dropna().replace(0.0, np.nan).replace(1.0, 0.985)
            markers, stems, base = ax[countrow][countcol].stem(
                grp_card_click_remove.index, grp_card_click_remove, linefmt='green', markerfmt='Xr', basefmt=" "
            )
            plt.setp(stems, 'linewidth', 0.5)
            plt.setp(markers, markersize = 4)
        countcol += 1
        if (countcol == total_cols):
            countcol = 0
            countrow += 1
            if (countrow == total_rows):
                break

    ad_metric_handle = lines.Line2D([], [], color='blue')
    encoding_active_patch = patches.Patch(color='lightgreen')
    filter_changed_handle = lines.Line2D(
        [], [], color='grey', marker='|', linestyle='None', markersize=10, markeredgewidth=1.5
    )
    submit_initial_handle = lines.Line2D(
        [], [], color='orange', marker='|', linestyle='None', markersize=10, markeredgewidth=1.5
    )
    submit_revise_handle = lines.Line2D(
        [], [], color='red', marker='|', linestyle='None', markersize=10, markeredgewidth=1.5
    )
    submit_final_handle = lines.Line2D(
        [], [], color='black', marker='|', linestyle='None', markersize=10, markeredgewidth=1.5
    )
    vis_handle = lines.Line2D(
        [], [], color='lightblue', marker='|', linestyle='None', markersize=10, markeredgewidth=1.5
    )
    list_handle = lines.Line2D(
        [], [], color='pink', marker='|', linestyle='None', markersize=10, markeredgewidth=1.5
    )
    card_handle = lines.Line2D(
        [], [], color='green', marker='|', linestyle='None', markersize=10, markeredgewidth=1.5
    )
    click_add_handle = lines.Line2D(range(1), range(1), color="white", marker='o', markerfacecolor="red")
    click_remove_handle = lines.Line2D(range(1), range(1), color="white", marker='X', markerfacecolor="red")
    handles = [ad_metric_handle, encoding_active_patch, filter_changed_handle, 
               submit_initial_handle, submit_revise_handle, submit_final_handle,
               vis_handle, list_handle, card_handle, 
               click_add_handle, click_remove_handle]
    labels = ['AD Metric', 'Encoding Active', 'Filter Changed', 
              'Initial Submission (Phase 1)', 'Return for Revisions', 'Final Submissions (Phase 2)',
              'Visualization (Click/Hover)', 'Selected List (Click)', 'Info Card (Click)', 
              'Add To Selection', 'Remove From Selection']
    fig.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5, 0.08), fancybox=True, shadow=True, ncol=4)
    
    ### SAVE FIGURE TO FILE ###

    plt.savefig(f'{condition}/{pid}/{task}_interaction_history.png', dpi=300, bbox_inches='tight')
    plt.close(fig)

RTSUM - movies
  (1/12) building chart for 1V7m8wY9JNak
  (2/12) building chart for 606gnDVdNwnt
  (3/12) building chart for 7UfOha66klmh
  (4/12) building chart for DAn39rXtnCAS
  (5/12) building chart for ggYz115dy0a9
  (6/12) building chart for joO2OX6p9rr6
  (7/12) building chart for rJwpqBcwhEar
  (8/12) building chart for UPGk3s3Qldql
  (9/12) building chart for vEiIrVSiezFn
  (10/12) building chart for w6ptnTrqQi3D
  (11/12) building chart for wJm4COpLVvz5
  (12/12) building chart for zV0yHZRI3MH9
