# Visual Attention movement

In [1]:
import pandas as pd
import regex as re
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import numpy as np

plt.rcParams['savefig.facecolor'] = 'white'

## Load The Data

In [2]:
def click_data_to_list(click_data_string):
    click_data_list = []
    for click in click_data_string.split(" "):
        click = click.split("-")
        # extract the x and y coordinates from a string via regex
        x = int(re.search(r'\d+', click[0])[0])
        y = int(re.search(r'\d+', click[1])[0])
        click_data_list.append((x, y))
    return click_data_list  #


def time_data_to_list(time_data_string):
    time_data_list = []
    for time in time_data_string.split(" "):
        time_data_list.append(float(time) / 1000)
    return time_data_list

In [3]:
df = pd.read_csv('./data/preprocessed_experiment_data.csv')
df = df[df["Outlier"] == False]
df = df.drop(columns=["Outlier"])

df["ClickData"] = df["ClickData"].apply(lambda x: click_data_to_list(x))
df["TimeData"] = df["TimeData"].apply(lambda x: time_data_to_list(x))
df["NumberOfClicks"] = df["ClickData"].apply(lambda x: len(x))

## Calculate the distance and direction between clicks

In [4]:
df_movement_metrics = pd.DataFrame(
    columns=["Algorithm", "ProgrammingStyle", "Participant", "Number", "Distance", "Direction"])

for idx, row in tqdm(df.iterrows(), total=len(df)):
    click_data = row["ClickData"]
    participant = row["ID"]
    number_of_clicks = row["NumberOfClicks"]
    for i in range(number_of_clicks - 1):
        distance = np.sqrt(
            (click_data[i][0] - click_data[i + 1][0]) ** 2 + (click_data[i][1] - click_data[i + 1][1]) ** 2)
        direction = np.arctan2(click_data[i + 1][1] - click_data[i][1], click_data[i + 1][0] - click_data[i][0])
        angle = direction / np.pi * 180
        if angle < 0:
            angle = 360 + angle
        df_movement_metrics.loc[len(df_movement_metrics)] = [row["Algorithm"], row["ProgrammingStyle"], participant, i,
                                                             distance, angle]

df_movement_metrics

  0%|          | 0/278 [00:00<?, ?it/s]

Unnamed: 0,Algorithm,ProgrammingStyle,Participant,Number,Distance,Direction
0,apply,iterative,212,0,168.315181,225.962864
1,apply,iterative,212,1,0.000000,0.000000
2,apply,iterative,212,2,19.646883,104.743563
3,apply,iterative,212,3,77.781746,261.869898
4,apply,iterative,212,4,53.000000,90.000000
...,...,...,...,...,...,...
6319,LinkedList,list-comprehension,315,19,24.738634,75.963757
6320,LinkedList,list-comprehension,315,20,27.892651,75.465545
6321,LinkedList,list-comprehension,315,21,24.000000,90.000000
6322,LinkedList,list-comprehension,315,22,26.019224,92.202598


In [5]:
def map_to_bin(value, bins):
    for idx, (low, high) in enumerate(bins):
        if low <= value < high:
            return int(idx)
    return 0


def polar_saccadic_plot(theta, value, err, number_of_bins, half_angle):
    # width for bins
    width = 2 * np.pi / number_of_bins

    ax = plt.subplot(111, projection='polar')

    ax.set_rlabel_position(half_angle)
    # set x ticks to theta + angle/2
    ax.set_xticks(theta + width / 2)
    # remove the tick labels
    ax.set_xticklabels([])

    colors = plt.cm.tab10(np.linspace(0, 1, len(theta)))
    # plot and set error

    ax.bar(theta, height=value, yerr=err, width=width, alpha=1.0, color=colors,
           error_kw=dict(ecolor='black', lw=2, capsize=0, capthick=10))

    return ax

# set the number of bins
number_of_bins = 8
line_height_in_pixels = 26
half_angle = 360 / number_of_bins / 2
angle = 360 / number_of_bins
marks = np.linspace(0.0, 360.0 - angle, number_of_bins)
edges = [(mark - half_angle, mark + half_angle) for mark in marks]
theta = np.linspace(0.0, 2 * np.pi, number_of_bins, endpoint=False)
df_movement_metrics["Bin"] = df_movement_metrics["Direction"].apply(lambda x: map_to_bin(x, edges))
df_movement_metrics["theta"] = df_movement_metrics["Bin"].apply(lambda x: x * angle / 180 * np.pi)
df_movement_metrics["LineDistance"] = df_movement_metrics["Distance"] / line_height_in_pixels

In [10]:
import numpy as np
import matplotlib.pyplot as plt
import os

#check if folder exists
if not os.path.exists('./results/jumping_dart_maps'):
    os.makedirs('./results/jumping_dart_maps')

max_line_distance = df_movement_metrics["LineDistance"].max()
for (programming_style), df_group in tqdm(df_movement_metrics.groupby(["ProgrammingStyle"])):
    # sort df by theta
    df_group = df_group.sort_values(by=["theta"])
    number_of_participants = len(df_group["Participant"].unique())

    distance_median = df_group.groupby("theta").agg({"LineDistance": "median"})
    distance_tmp = []
    for theta_tmp in theta:
        if len(distance_median[distance_median.index == theta_tmp]) == 0:
            distance_tmp.append(np.array([0]))
        else:
            distance_tmp.append(distance_median[distance_median.index == theta_tmp].values[0])
    distance_median = [distance[0] for distance in distance_tmp]
    distance_err = df_group.groupby("theta").agg({"LineDistance": "std"})
    distance_err_tmp = []
    for theta_tmp in theta:
        if len(distance_err[distance_err.index == theta_tmp]) == 0:
            distance_err_tmp.append(np.array([0]))
        else:
            distance_err_tmp.append(distance_err[distance_err.index == theta_tmp].values[0])
    distance_err = [distance[0] for distance in distance_err_tmp]
    distance_err = [(min(median-0.5, err), err) for median, err in zip(distance_median, distance_err)]
    distance_err = np.array(distance_err).T

    df_amount = pd.DataFrame(columns=["Participant", "Theta", "Amount", "Total"])
    for group, df_group_by_participant in df_group.groupby(["Participant"]):
        participant = group
        total = len(df_group_by_participant)
        for current_theta, df_group_by_theta in df_group_by_participant.groupby("theta"):
            amount = len(df_group_by_theta)
            df_amount.loc[len(df_amount)] = [participant, current_theta, amount, total]
    df_amount["Probability"] = df_amount["Amount"] / df_amount["Total"]



    probability_average = df_amount.groupby("Theta").agg({"Probability": lambda x: np.sum(x)/number_of_participants})
    probability_average_tmp = []
    for current_theta in theta:
        if len(probability_average[probability_average.index == current_theta]) == 0:
            probability_average_tmp.append(np.array([0]))
        else:
            probability_average_tmp.append(probability_average[probability_average.index == current_theta].values[0])
    probability_average = [probability[0] for probability in probability_average_tmp]
    probability_error = df_amount.groupby("Theta").agg({"Probability": "std"})
    probability_error_tmp = []
    for current_theta in theta:
        if len(probability_error[probability_error.index == current_theta]) == 0:
            probability_error_tmp.append(np.array([0]))
        else:
            probability_error_tmp.append(probability_error[probability_error.index == current_theta].values[0])
    probability_error = [probability[0] for probability in probability_error_tmp]
    probability_error = [(min(avg, err), err) for avg, err in zip(probability_average, probability_error)]
    probability_error = np.array(probability_error).T

    # plot distance
    plt.figure(figsize=(5, 5))
    ax = polar_saccadic_plot(theta, distance_median, distance_err, number_of_bins, half_angle)
    # set title
    ax.set_title(f"{programming_style}: Average Jump Distance in Line Height Units")
    plt.savefig(f"./results/jumping_dart_maps/{programming_style}_distance.png")
    ax.set_ylim(0, 15)
    plt.close('all')

    # plot probability of jumping
    plt.figure(figsize=(5, 5))
    ax = polar_saccadic_plot(theta, probability_average, probability_error, number_of_bins, half_angle)
    ax.set_title(f"{programming_style}: Probability of Jumping")
    ax.set_ylim(0, 0.5)
    plt.savefig(f"./results/jumping_dart_maps/{programming_style}_probability.png")
    plt.close('all')

  0%|          | 0/4 [00:00<?, ?it/s]