# Analyse the Visual Attention Data

In [None]:
import pandas as pd
import regex as re
import matplotlib.pyplot as plt
import seaborn as sns
import json
import modules.GenSnippetsLib as snippets
import scipy.ndimage
from pygments.lexers import PythonLexer
from tqdm.notebook import tqdm
import matplotlib.patches as patches
import PIL
import PIL.Image
import numpy as np
import chord

In [None]:
df = pd.read_csv('./data/preprocessed_experiment_data.csv')
df = df[df["Outlier"] == False]
df = df.drop(columns=["Outlier"])
df

In [None]:
def click_data_to_list(click_data_string):
    click_data_list = []
    for click in click_data_string.split(" "):
        click = click.split("-")
        # extract the x and y coordinates from a string via regex
        x = int(re.search(r'\d+', click[0])[0])
        y = int(re.search(r'\d+', click[1])[0])
        click_data_list.append((x, y))
    return click_data_list

In [None]:
def time_data_to_list(time_data_string):
    time_data_list = []
    for time in time_data_string.split(" "):
        time_data_list.append(float(time) / 1000)
    return time_data_list

In [None]:
df["ClickData"] = df["ClickData"].apply(lambda x: click_data_to_list(x))
df["TimeData"] = df["TimeData"].apply(lambda x: time_data_to_list(x))
df["NumberOfClicks"] = df["ClickData"].apply(lambda x: len(x))
df["AverageTimeOfClicks"] = (df["ResponseTime"] / 1000) / df["NumberOfClicks"]

In [None]:
df

## Descriptive: Get Number of Clicks per Algorithm

In [None]:
df_tmp = df.copy()
df_tmp["Algorithm"] = "total"
df_visual = df.append(df_tmp)
df_visual[["Algorithm", "ProgrammingStyle", "NumberOfClicks"]].groupby(["Algorithm", "ProgrammingStyle"]).describe()

In [None]:
df_tmp = df.copy()
df_tmp["Algorithm"] = "total"
df_visual = df.append(df_tmp)
ax = sns.catplot(x="Algorithm", y="NumberOfClicks", hue="ProgrammingStyle", data=df_visual, kind="bar", height=5,
                 aspect=5)
ax.tight_layout()
ax.set(ylabel='Number Of Clicks per Algorithm');

In [None]:
df_tmp = df_visual[["ProgrammingStyle", "Algorithm", "NumberOfClicks"]]
df_tmp = df_tmp[df_tmp["Algorithm"] != "total"]
ax = sns.violinplot(x="ProgrammingStyle", y="NumberOfClicks", data=df_tmp)
ax.set(ylim=(0.0, 100.0));

In [None]:
df_tmp = df_visual[["ProgrammingStyle", "Algorithm", "NumberOfClicks"]]
df_tmp = df_tmp[df_tmp["Algorithm"] != "total"]
ax = sns.boxplot(x="ProgrammingStyle", y="NumberOfClicks", data=df_tmp)
ax.set(ylim=(0.0, 100.0));

## Descriptive: Get Average Time of Clicks per Algorithm

In [None]:
df_tmp = df.copy()
df_tmp["Algorithm"] = "total"
df_visual = df.append(df_tmp)
df_visual[["Algorithm", "ProgrammingStyle", "AverageTimeOfClicks"]].groupby(
    ["Algorithm", "ProgrammingStyle"]).describe()

In [None]:
df_tmp = df.copy()
df_tmp["Algorithm"] = "total"
df_visual = df.append(df_tmp)
ax = sns.catplot(x="Algorithm", y="AverageTimeOfClicks", hue="ProgrammingStyle", data=df_visual, kind="bar", height=5,
                 aspect=5)
ax.tight_layout()
ax.set(ylabel='Average Time per Clicks per Algorithm');

In [None]:
df_tmp = df_visual[["ProgrammingStyle", "Algorithm", "AverageTimeOfClicks"]]
df_tmp = df_tmp[df_tmp["Algorithm"] != "total"]
ax = sns.violinplot(x="ProgrammingStyle", y="AverageTimeOfClicks", data=df_tmp)
ax.set(ylim=(0.0, 20.0));

In [None]:
df_tmp = df_visual[["ProgrammingStyle", "Algorithm", "AverageTimeOfClicks"]]
df_tmp = df_tmp[df_tmp["Algorithm"] != "total"]
ax = sns.violinplot(x="ProgrammingStyle", y="AverageTimeOfClicks", data=df_tmp)
ax.set(ylim=(0.0, 20.0));


# Descriptive: Areas Of Interest

In [None]:
algorithms = df[["Algorithm"]].sort_values(by="Algorithm")["Algorithm"].unique()
programming_styles = df[["ProgrammingStyle"]].sort_values(by="ProgrammingStyle")["ProgrammingStyle"].unique()
algorithms = [x for x in algorithms]
programming_styles = [x.replace("-", "_") for x in programming_styles]

In [None]:
color_palette = sns.color_palette("hls", 15)
color_palette_mapping = {
    "Method" : color_palette[0],
    "MethodDeclaration" : color_palette[1],
    "MethodBody" : color_palette[2],
    "Helper" : color_palette[3],
    "HelperDeclaration" : color_palette[4],
    "HelperBody" : color_palette[5],
    "Iteration" : color_palette[6],
    "IterationHead" : color_palette[7],
    "IterationBody" : color_palette[8],
    "RecursiveCall" : color_palette[9],
    "HigherOrder" : color_palette[10],
    "ListComprehension" : color_palette[11],
    "InputPreparation" : color_palette[12],
    "StartOfAlgorithm" : color_palette[13],
    "Class" : color_palette[14],
}

aoi_numbers = {name : idx for idx, (name, value) in enumerate(color_palette_mapping.items())}
aoi_names = [name for name, _value in color_palette_mapping.items()]

#### Create bounding boxes for AOIS

In [None]:
#load file
df_bounding_box = pd.DataFrame(columns=["Algorithm", "ProgrammingStyle", "BoundingBoxes", "ImageDimension"])
image_path = "./results/aois/"
font_path="\\data\\fonts\\ttf\\"
offset = 3

def from_canvas(fig):
    lst = list(fig.canvas.get_width_height())
    lst.append(3)
    return PIL.Image.fromarray(np.frombuffer(fig.canvas.tostring_rgb(),dtype=np.uint8).reshape(lst))

for algorithm in tqdm(algorithms):
    for programming_style in programming_styles:
        # load the image and generate bb code info
        file_name = './data/CodeSnippets/Generators/' + algorithm + '_' + programming_style + '.json'
        try:
            image, result = snippets.create_image(file_name, font_path=font_path, lexer=PythonLexer)
            y, x = image.size[::-1]
            dimension = (x, y)
        except Exception as e:
            print(file_name + ":" + str(e))
            continue
        # extract the areas of interest in the image
        areas_of_interest = set()
        for letter in result:
            for aoi in letter["AOI"]:
                areas_of_interest.add(aoi)

        # create the bounding boxes
        disjoint_areas_of_interest = []
        for aoi in areas_of_interest:
            upper_left_x = 0
            upper_left_y = 0
            lower_right_x = 0
            lower_right_y = 0
            has_aoi = False
            for letter in result:
                if letter["BoundingBox"][0] == 0 and letter["BoundingBox"][1] == 0 and letter["BoundingBox"][2] == 0 and letter["BoundingBox"][3] == 0:
                    continue

                if aoi in letter["AOI"]:
                    if not has_aoi:
                        upper_left_x = letter["BoundingBox"][0] - offset
                        upper_left_y = letter["BoundingBox"][1] - offset
                        lower_right_x = letter["BoundingBox"][2] + offset
                        lower_right_y = letter["BoundingBox"][3] + offset
                        has_aoi = True
                    else:
                        upper_left_x = min(upper_left_x, letter["BoundingBox"][0] - offset)
                        upper_left_y = min(upper_left_y, letter["BoundingBox"][1] - offset)
                        lower_right_x = max(lower_right_x, letter["BoundingBox"][2] + offset)
                        lower_right_y = max(lower_right_y, letter["BoundingBox"][3] + offset)
                else:
                    if has_aoi:
                        disjoint_areas_of_interest.append((aoi, [upper_left_x, upper_left_y, lower_right_x, lower_right_y]))
                    has_aoi = False
                    upper_left_x = 0
                    upper_left_y = 0
                    lower_right_x = 0
                    lower_right_y = 0

            if has_aoi:
                disjoint_areas_of_interest.append((aoi, [upper_left_x, upper_left_y, lower_right_x, lower_right_y]))

        # create the bounding boxes without None aoi
        disjoint_areas_of_interest = [area for area in disjoint_areas_of_interest if area[0] != "None"]
        df_bounding_box = df_bounding_box.append(pd.DataFrame([[algorithm, programming_style.replace("_", "-"), disjoint_areas_of_interest, dimension]], columns=df_bounding_box.columns))

        # map a color to each aoi
        disjoint_areas_of_interest_visual = []
        for aoi, bounding_box in disjoint_areas_of_interest:
            h, s, l = color_palette_mapping[aoi]
            color = (h, s, l, 0.2)
            disjoint_areas_of_interest_visual.append((aoi, bounding_box, color))

        # create the image with the bounding boxes
        fig, ax = plt.subplots(1, 1, figsize=(10, 10))
        ax.imshow(image, aspect='auto')

        for (aoi, bounding_box, color) in disjoint_areas_of_interest_visual:
            rect = patches.Rectangle((bounding_box[0], bounding_box[1]), bounding_box[2] - bounding_box[0],
                             bounding_box[3] - bounding_box[1], linewidth=1, edgecolor=color, facecolor=color)
            ax.add_patch(rect)

        fig, ax = plt.subplots(1, 1)
        ax.imshow(image)

        for (aoi, bounding_box, color) in disjoint_areas_of_interest_visual:
            rect = patches.Rectangle((bounding_box[0], bounding_box[1]), bounding_box[2] - bounding_box[0],
                             bounding_box[3] - bounding_box[1], linewidth=1, edgecolor=color, facecolor=color)
            ax.add_patch(rect)

        plt.tight_layout()
        plt.axis('off')
        plt.savefig(image_path + "aoi_" + algorithm + '_' + programming_style + '.png', bbox_inches='tight', transparent=True, pad_inches=0)
        plt.close('all')
df = pd.merge(df, df_bounding_box, on=["Algorithm", "ProgrammingStyle"])

#### AOI coverage

In [None]:
from skimage.draw import ellipse, rectangle
def get_ellipse_mask(center, x_radius, y_radius, dimension, std_value = 1.0):
    """
    Returns a mask of an ellipse with the given center, x-radius, and y-radius.
    """
    x_dim = max(center[0]+2*x_radius+1, dimension[0])
    y_dim = max(center[1]+2*y_radius+1, dimension[1])
    mask = np.zeros((y_dim, x_dim), dtype=np.float32)
    rr, cc = ellipse(center[1], center[0], y_radius, x_radius)
    mask[rr, cc] = std_value
    mask = mask[:dimension[1], :dimension[0]]
    return mask

def get_rectangular_mask(points, dimension, std_value = 1.0):
    """
    Returns a mask of a rectangular with the given points.
    """
    x_dim = max(points[2]+1, dimension[0])
    y_dim = max(points[3]+1, dimension[1])
    mask = np.zeros((y_dim, x_dim), dtype=np.float32)
    mask[points[1]:points[3]+1, points[0]:points[2]+1] = std_value
    mask = mask[:dimension[1], :dimension[0]]
    return mask

In [None]:
settings = json.load(open("./data/settings.json"))
x_radius = settings["radius_x"]+settings["grad_radius"]//2
y_radius = settings["radius_y"]+settings["grad_radius"]//2

In [None]:
def heatmap_helper(image, heat_array, grad_radius, treshhold=0.2):
    cmap = plt.cm.get_cmap('jet')

    heat_array = heat_array.copy()
    heat_array = scipy.ndimage.gaussian_filter(heat_array, sigma=grad_radius)
    max_value = np.max(heat_array)
    heat_array = heat_array / max_value
    key_out_mask = heat_array > treshhold
    heat_normalized = heat_array.copy()
    heat_array = heat_array*255
    heat_array = heat_array.astype(np.uint8)
    heat_array = cmap(heat_array)
    heat_array = heat_array*255

    hmap = np.array(image)
    hmap[key_out_mask] = 0.5*hmap[key_out_mask] + 0.5*heat_array[key_out_mask]
    return hmap, heat_normalized, max_value

In [None]:
for group_name, df_group in tqdm(df.groupby(["Algorithm", "ProgrammingStyle"])):
    image = "./data/images/" + group_name[0] + "_" + group_name[1].replace("-", "_") + ".png"
    image = PIL.Image.open(image)
    dimension = image.size
    average_heat = np.zeros((dimension[1], dimension[0]))
    for idx, row in tqdm(df_group.iterrows(), total=len(df_group)):
        clicks = row["ClickData"]
        duration_per_click = row["TimeData"]
        duration_tmp = [duration_per_click[0]]
        for duration_idx, value in enumerate(duration_per_click):
            if duration_idx == 0:
                continue
            duration_tmp.append(value-duration_per_click[duration_idx-1])
        duration_per_click = duration_tmp

        bounding_boxes = row["BoundingBoxes"]
        example = np.zeros((dimension[1], dimension[0]))
        for click, duration in zip(clicks, duration_per_click):
            elliptical_mask = get_ellipse_mask(click, x_radius, y_radius, dimension)
            example += (elliptical_mask*duration)
        _hmap, normalized_heat, _max_value = heatmap_helper(image, example, settings["grad_radius"]//2)
        average_heat += normalized_heat

    image = np.array(image)

    average_heat = scipy.ndimage.gaussian_filter(average_heat, sigma=4)
    average_heat = average_heat / len(df_group)
    mask = average_heat >= 0.00

    cmap = plt.cm.get_cmap('jet')
    average_heat = average_heat*255
    average_heat = average_heat.astype(np.uint8)
    average_heat = cmap(average_heat)
    average_heat = average_heat*255

    image[mask] = 0.5*image[mask] + 0.5*average_heat[mask]

    fig, ax = plt.subplots(1, 1, figsize=(10, 10))
    fig.set_size_inches(dimension[0]/100 * 1.1, dimension[1]/100)
    sm = plt.cm.ScalarMappable(cmap=plt.cm.get_cmap("jet"))
    plt.rcParams['savefig.facecolor']='white'
    plt.axis('off')
    plt.tight_layout()
    ax.imshow(image, aspect='auto')
    plt.colorbar(sm, fraction=0.046, pad=0.04)
    plt.savefig("./results/average_heatmaps/" + group_name[0] + "_" + group_name[1].replace("-", "_") + ".png", bbox_inches='tight' , pad_inches=0)
    plt.close('all')

In [None]:
df["AOI_TransitionMatrix"] = None
df["AOI_TransitionMatrix"] = df["AOI_TransitionMatrix"].apply(lambda x: np.zeros((len(aoi_numbers), len(aoi_numbers))))
df_aoi = pd.DataFrame([], columns=["Algorithm", "ProgrammingStyle", "AOI", "Number", "Duration"])

for idx, row in tqdm(df.iterrows(), total=df.shape[0]):
    algorithm = row["Algorithm"]
    programming_style = row["ProgrammingStyle"]
    counting_array = [0 for i in range(len(aoi_numbers))]
    timing_array = [0 for i in range(len(aoi_numbers))]

    dimension = row["ImageDimension"]
    clicks = row["ClickData"]

    duration_per_click = row["TimeData"]
    duration_tmp = [duration_per_click[0]]
    for duration_idx, value in enumerate(duration_per_click):
        if duration_idx == 0:
            continue
        duration_tmp.append(value-duration_per_click[duration_idx-1])
    duration_per_click = duration_tmp

    bounding_boxes = row["BoundingBoxes"]
    mask_bounding_boxes = [get_rectangular_mask(bounding_box, dimension) for _, bounding_box in bounding_boxes]
    idx_bounding_boxes = [aoi_numbers[aoi_name] for aoi_name, _ in bounding_boxes]
    transition_matrix = np.zeros((len(aoi_numbers), len(aoi_numbers)))
    current_aoi = []

    for click, duration in zip(clicks, duration_per_click):
        elliptical_mask = get_ellipse_mask(click, x_radius, y_radius, dimension)
        next_aoi = []
        for idx_bounding_box, mask_bounding_box in zip(idx_bounding_boxes, mask_bounding_boxes):
            if np.max(mask_bounding_box+elliptical_mask) >= 2:
                next_aoi.append(idx_bounding_box)
        for aoi_idx in next_aoi:
            counting_array[aoi_idx] += 1
            timing_array[aoi_idx] += duration

        combination_list = [(i, j) for i in current_aoi for j in next_aoi]
        for current_aoi_idx, next_aoi_idx in combination_list:
            transition_matrix[current_aoi_idx, next_aoi_idx] += 1

        current_aoi = [value for value in next_aoi]

    for idx, name in enumerate(aoi_names):
        number = counting_array[idx]
        duration = timing_array[idx]
        df_aoi = df_aoi.append(pd.DataFrame([[algorithm, programming_style, name, number, duration]], columns=df_aoi.columns))

    df.at[idx, "AOI_TransitionMatrix"] = transition_matrix
df_aoi.reset_index(drop=True);

In [None]:
df_tmp = df_aoi.copy()
ax = sns.catplot(x="AOI", y="Number", hue="ProgrammingStyle", data=df_tmp, kind="bar", height=5, aspect=5)
ax.tight_layout()
ax.set(ylabel='Number Of Clicks in AOI per Condition');

In [None]:
df_tmp = df_aoi.copy()
ax = sns.catplot(x="AOI", y="Duration", hue="ProgrammingStyle", data=df_tmp, kind="bar", height=5, aspect=5)
ax.tight_layout()
ax.set(ylabel='Time in AOI per Condition');

In [None]:
def sum_up_transition(df):
    aoi_transition_matrix = np.zeros((len(aoi_names), len(aoi_names)))
    for idx, row in df.iterrows():
        aoi_transition_matrix += row["AOI_TransitionMatrix"]

    return aoi_transition_matrix

df_aoi_transition = df[["Algorithm", "ProgrammingStyle", "AOI_TransitionMatrix"]]\
    .groupby(["Algorithm", "ProgrammingStyle"])\
    .apply(lambda df: sum_up_transition(df))

df_aoi_transition = df_aoi_transition.reset_index()
df_aoi_transition = df_aoi_transition.rename(columns={0: "AOI_TransitionMatrix"})
df_aoi_transition

In [None]:
iterative_transition = sum_up_transition(df_aoi_transition[df_aoi_transition["ProgrammingStyle"] == "iterative"])
iterative_algorithm = "total"
iterative_style = "iterative"

recursive_transition = sum_up_transition(df_aoi_transition[df_aoi_transition["ProgrammingStyle"] == "recursive"])
recursive_algorithm = "total"
recursive_style = "recursive"

higher_order_transition = sum_up_transition(df_aoi_transition[df_aoi_transition["ProgrammingStyle"] == "higher_order"])
higher_order_algorithm = "total"
higher_order_style = "higher_order"

list_comprehension_transition = sum_up_transition(df_aoi_transition[df_aoi_transition["ProgrammingStyle"] == "list_comprehension"])
list_comprehension_algorithm = "total"
list_comprehension_style = "list_comprehension"