<h1> Data Analysis for REYeker</h1>

In [1]:
# lib for dataframes
# lib for crerating paths
from pathlib import Path

# lib for plotting
%matplotlib inline
import matplotlib.pyplot as plt

# REYeker lib
import modules.rEYEkerAnalysis as rEYEker

# lib for numerical computations
import numpy as np
import pandas as pd
import sonaion_analysis as son

# lib for saving np images
from PIL import Image

<h2>1. Configuration</h2>

<h5>Database configuration </h5>

In [2]:
# path to the datafile
config_datasheet_path = r"./results/preprocessed.xlsx"

# columns with visual stimulus data
config_visual_stimulus_variable = "ClickData"

# colums of response time
config_visual_time_variable = "TimeData"

# columns with programming style
config_programming_style_variable = "ProgrammingStyle"

# columns with names of the algo
config_algo_name_variable = "Algorithm"

# columns with correctness value
config_corectness_variable = "Correctness"

# colums of response time
config_response_time_variable = "ResponseTime"

In [3]:
click_settings = rEYEker.load_settings_from_json("./data/settings.json")

<h5>Import the preprocessed dataframe</h5>

In [4]:
needed_columns = [
    "ID",
    config_programming_style_variable,
    config_algo_name_variable,
    config_corectness_variable,
    config_response_time_variable,
    config_visual_stimulus_variable,
    config_visual_time_variable,
]

raw = pd.read_excel(config_datasheet_path)
df = pd.DataFrame(raw, columns=needed_columns)
algo_name_array = [name for name in df[config_algo_name_variable].unique()]

df_tensor = []

for algo_name in algo_name_array:
    algo_df = df.loc[df[config_algo_name_variable] == algo_name]
    df_array = [
        algo_df.loc[algo_df[config_programming_style_variable] == "iterative"],
        algo_df.loc[algo_df[config_programming_style_variable] == "recursive"],
        algo_df.loc[algo_df[config_programming_style_variable] == "higher-order"],
        algo_df.loc[algo_df[config_programming_style_variable] == "list-comprehension"],
    ]
    df_tensor.append(df_array)

In [5]:
# data for loading the images
image_path_tensor = []

for algo_name in algo_name_array:
    image_path_array = [
        "./data/images/" + algo_name + "_iterative.png",
        "./data/images/" + algo_name + "_recursive.png",
        "./data/images/" + algo_name + "_higher_order.png",
        "./data/images/" + algo_name + "_list_comprehension.png",
    ]
    image_path_tensor.append(image_path_array)

# where to save to heatmaps and sequence diagrams
config_folder_prefix_array = ["Iterative/", "Recursive/", "HigherOrder/", "ListComprehension/"]

# used for saving the heatmaps and sequence diagrams
config_image_prefix_tensor = []
for algo_name in algo_name_array:
    image_prefix_array = [
        "Iteraitive_" + algo_name + "_",
        "Recursive_" + algo_name + "_",
        "Higher-Order_" + algo_name + "_",
        "List-Comprhension_" + algo_name + "_",
    ]
    config_image_prefix_tensor.append(image_prefix_array)

<h4>Import Images Settings</h4>

In [6]:
image_tensor = []

# read in every image
for image_path_array in image_path_tensor:
    image_array = []
    for image_path in image_path_array:
        img = rEYEker.load_image(image_path)
        image_array.append(img)
    image_tensor.append(image_array)

<h4> Cast Data to Valid format</h4>

Import the visual stimulus measured Data

In [7]:
visual_stimulus_data_tensor = []
time_data_tensor = []

# iter over every dataframe
for df_array in df_tensor:

    visual_stimulus_data_matrix = []
    time_data_matrix = []
    for idx, dataframe in enumerate(df_array):
        visual_stimulus_array = []
        time_data_array = []

        # iter over every row
        for _idx, row in dataframe.iterrows():
            data_str = row[config_visual_stimulus_variable]
            data_str = str(data_str).strip()
            coordinates_str = data_str.split(" ")
            coordinates = []

            times_str = row[config_visual_time_variable]
            times_str = str(times_str).strip()
            times_str = times_str.split(" ")
            times = []

            # iter over every coordinate pair x-y
            for coordinate_str in coordinates_str:
                try:
                    coordinate = coordinate_str.split("-")
                    coordinate = (int(coordinate[0]), int(coordinate[1]))
                    coordinates.append(coordinate)
                except:
                    coordinates.append(None)

            for time_str in times_str:
                try:
                    time_str = int(time_str)
                    times.append(time_str)
                except:
                    times.append(None)

            visual_stimulus_array.append(coordinates)
            time_data_array.append(times)

        visual_stimulus_data_matrix.append(visual_stimulus_array)
        time_data_matrix.append(time_data_array)

    visual_stimulus_data_tensor.append(visual_stimulus_data_matrix)
    time_data_tensor.append(time_data_matrix)

<h4>Helper Functions</h4>

In [8]:
def save_images(image_array, folder, image_name):
    """
    :brief saves an array of images to a certain location incrementing the postfix by a number
    :param image_array:        array of images (np.ndarray)
    :param folder:     prefix of image/ folder location
    :param image_name: prefix for the image
    """

    Path(folder).mkdir(parents=True, exist_ok=True)

    prefix = folder + image_name

    # TODO create folders if there are none present
    for idx, data in enumerate(image_array):
        data = data * 255
        data = np.uint8(data)
        im = Image.fromarray(data)
        im.save(prefix + str(idx) + ".png")


def compare_for_h0(arr_1, arr_2, alpha):
    t, p = stats.ttest_ind(arr_1, arr_2)
    if p > alpha:
        return True, t, p
    else:
        return False, t, p


def is_in(value, tup):
    return tup[0] <= value <= tup[1]


def get_0_offset(number):
    i = 0
    number = int(number)
    while number != 0:
        number = int(number / 10)
        i = i + 1
    return i

<h2>2. Create Single Heatmaps</h2>

create heatmaps

In [19]:
heatmap_tensor = []

print("Going to process " + str(len(visual_stimulus_data_tensor)) + " datatables: ")
for algo_idx in range(len(visual_stimulus_data_tensor)):
    heatmaps_matrix = []
    print("\tGoing to process algorithm #" + str(algo_idx) + ":")

    # iterate over all the datasets
    for implementation_idx in range(len(visual_stimulus_data_tensor[algo_idx])):

        print("\t\tImplementation Variant #" + str(implementation_idx) + ":")
        heatmap_array = []

        print("\t\t\t", end="")
        # iterate over all the measurements of the dataset
        for dataset_idx in range(len(visual_stimulus_data_tensor[algo_idx][implementation_idx])):
            click_data = visual_stimulus_data_tensor[algo_idx][implementation_idx][dataset_idx]
            time_data = time_data_tensor[algo_idx][implementation_idx][dataset_idx]
            image = image_tensor[algo_idx][implementation_idx]

            print("-", end="")
            try:
                im = rEYEker.draw_shape_heat_map(image, click_data, click_settings, should_copy=True, time_stamps=time_data)
                heatmap_array.append(im)
            except:
                heatmap_array.append(image)

        print()
        heatmaps_matrix.append(heatmap_array)

    heatmap_tensor.append(heatmaps_matrix)

Going to process 12 datatables: 
	Going to process algorithm #0:
		Implementation Variant #0:
			-------
		Implementation Variant #1:
			--------
		Implementation Variant #2:
			--------
		Implementation Variant #3:
			------
	Going to process algorithm #1:
		Implementation Variant #0:
			-------
		Implementation Variant #1:
			-------
		Implementation Variant #2:
			------
		Implementation Variant #3:
			-------
	Going to process algorithm #2:
		Implementation Variant #0:
			-------
		Implementation Variant #1:
			------
		Implementation Variant #2:
			------
		Implementation Variant #3:
			-------
	Going to process algorithm #3:
		Implementation Variant #0:
			---------
		Implementation Variant #1:
			-------
		Implementation Variant #2:
			-------
		Implementation Variant #3:
			-----
	Going to process algorithm #4:
		Implementation Variant #0:
			------
		Implementation Variant #1:
			-------
		Implementation Variant #2:
			--------
		Implementation Variant #3:
			-------
	Going to


save Heatmaps

In [29]:
for algo_idx, heatmaps_matrix in enumerate(heatmap_tensor):
    for idx, heatmap_array in enumerate(heatmaps_matrix):
        path = "./results/" + config_folder_prefix_array[idx] + str(algo_name_array[algo_idx]) + "/heatmaps/"
        print("Writing to:" + path)
        save_images(heatmap_array, path, config_image_prefix_tensor[algo_idx][idx])

Writing to:./results/Iterative/apply/heatmaps/
Writing to:./results/Recursive/apply/heatmaps/
Writing to:./results/HigherOrder/apply/heatmaps/
Writing to:./results/ListComprehension/apply/heatmaps/
Writing to:./results/Iterative/condition_sum/heatmaps/
Writing to:./results/Recursive/condition_sum/heatmaps/
Writing to:./results/HigherOrder/condition_sum/heatmaps/
Writing to:./results/ListComprehension/condition_sum/heatmaps/
Writing to:./results/Iterative/find/heatmaps/
Writing to:./results/Recursive/find/heatmaps/
Writing to:./results/HigherOrder/find/heatmaps/
Writing to:./results/ListComprehension/find/heatmaps/
Writing to:./results/Iterative/is_prime/heatmaps/
Writing to:./results/Recursive/is_prime/heatmaps/
Writing to:./results/HigherOrder/is_prime/heatmaps/
Writing to:./results/ListComprehension/is_prime/heatmaps/
Writing to:./results/Iterative/max/heatmaps/
Writing to:./results/Recursive/max/heatmaps/
Writing to:./results/HigherOrder/max/heatmaps/
Writing to:./results/ListCompre

<h2>3. Create Average Heatmaps</h2>

create heatmaps

In [None]:
heatmap_tensor = []
mask_tensor = []

print("Going to process " + str(len(visual_stimulus_data_tensor)) + " datatables: ")
for algo_idx in range(len(visual_stimulus_data_tensor)):
    if algo_idx == 2:
        break
    average_heatmap_array = []
    mask_array = []
    print("\tGoing to process algorithm #" + str(algo_idx) + ":")

    # iterate over all the datasets
    for implementation_idx in range(len(visual_stimulus_data_tensor[algo_idx])):
        print("\t\tImplementation Variant #" + str(implementation_idx) + ":")

        image = image_tensor[algo_idx][implementation_idx]
        click = visual_stimulus_data_tensor[algo_idx][implementation_idx]
        time = time_data_tensor[algo_idx][implementation_idx]

        click_tmp = []
        time_tmp = []

        for i in range(len(click)):

            if click[i] is not None or None not in click[i]:
                click_tmp.append(click[i])
                time_tmp.append(time[i])

        try:
            im, mask = rEYEker.draw_average_shape_heat_map_rel(image, click_tmp, click_settings, 1.0, 0.0, time_tmp, should_copy=True)
        except:
            print(click_tmp)

        average_heatmap_array.append(im)
        mask_array.append(mask)

    print()
    heatmap_tensor.append(average_heatmap_array)
    mask_tensor.append(mask_array)

Going to process 12 datatables: 
	Going to process algorithm #0:
		Implementation Variant #0:
		Implementation Variant #1:
		Implementation Variant #2:
		Implementation Variant #3:

	Going to process algorithm #1:
		Implementation Variant #0:
		Implementation Variant #1:
[[None], [(67, 17), (81, 40), (100, 62), (103, 94), (192, 120), (156, 171), (103, 248), (73, 35), (100, 62), (119, 91), (240, 118), (190, 89), (77, 37), (133, 92)], [(117, 246), (88, 43), (101, 66), (121, 90), (124, 114), (121, 152), (132, 174), (153, 93), (152, 114), (148, 142), (146, 178), (120, 244), (106, 16), (109, 54), (116, 89), (119, 106), (121, 116), (129, 174), (154, 95), (165, 169), (167, 86), (147, 167), (159, 116), (160, 92), (165, 163), (163, 97), (133, 123), (122, 248)], [(75, 15), (94, 49), (100, 238), (127, 85), (140, 169), (186, 95)], [(176, 131), (177, 119), (159, 168), (28, 130), (64, 144), (86, 254), (128, 252), (130, 127), (123, 184), (114, 287), (117, 249)], [(88, 6), (95, 42), (83, 67), (133, 94

save heatmaps

In [34]:
for algo_idx, heatmaps_matrix in enumerate(heatmap_tensor):
    for idx, heatmap in enumerate(heatmaps_matrix):
        # path = "./results/" + str(algo_name_array[algo_idx]) + "/heatmaps/average_heatmap/"
        path = "./results/averageHeatMaps_of_wrong/"
        print("Writing to:" + path)
        save_images([heatmap], path, config_image_prefix_tensor[algo_idx][idx])

Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./results/averageHeatMaps_of_wrong/
Writing to:./

<h2>4. Create Sequence diagramms</h2>

create sequence diagrams

In [30]:
sequence_tensor = []

print("Going to process " + str(len(visual_stimulus_data_tensor)) + " datatables: ")
for algo_idx in range(len(visual_stimulus_data_tensor)):
    sequence_matrix = []
    print("\tGoing to process algorithm #" + str(algo_idx) + ":")

    # iterate over all the datasets
    for implementation_idx in range(len(visual_stimulus_data_tensor[algo_idx])):

        print("\t\tImplementation Variant #" + str(implementation_idx) + ":")
        sequence_array = []

        print("\t\t\t", end="")
        # iterate over all the measurements of the dataset
        for dataset_idx in range(len(visual_stimulus_data_tensor[algo_idx][implementation_idx])):
            click_data = visual_stimulus_data_tensor[algo_idx][implementation_idx][dataset_idx]
            time_data = time_data_tensor[algo_idx][implementation_idx][dataset_idx]
            image = image_tensor[algo_idx][implementation_idx]

            print("-", end="")
            try:
                im = rEYEker.draw_vertical_line_diagram(image, click_data, should_copy=True, time_stamps=time_data)
                sequence_array.append(im)
            except:
                sequence_array.append(image)

        print()
        sequence_matrix.append(sequence_array)

    sequence_tensor.append(sequence_matrix)

Going to process 12 datatables: 
	Going to process algorithm #0:
		Implementation Variant #0:
			-------
		Implementation Variant #1:
			--------
		Implementation Variant #2:
			--------
		Implementation Variant #3:
			------
	Going to process algorithm #1:
		Implementation Variant #0:
			-------
		Implementation Variant #1:
			-------
		Implementation Variant #2:
			------
		Implementation Variant #3:
			-------
	Going to process algorithm #2:
		Implementation Variant #0:
			-------
		Implementation Variant #1:
			------
		Implementation Variant #2:
			------
		Implementation Variant #3:
			-------
	Going to process algorithm #3:
		Implementation Variant #0:
			---------
		Implementation Variant #1:
			-------
		Implementation Variant #2:
			-------
		Implementation Variant #3:
			-----
	Going to process algorithm #4:
		Implementation Variant #0:
			------
		Implementation Variant #1:
			-------
		Implementation Variant #2:
			--------
		Implementation Variant #3:
			-------
	Going to

save sequence diagrams

In [31]:
for algo_idx, sequence_matrix in enumerate(sequence_tensor):
    for idx, sequence_array in enumerate(sequence_matrix):
        path = "./results/" + config_folder_prefix_array[idx] + str(algo_name_array[algo_idx]) + "/sequence/"
        print("Writing to:" + path)
        save_images(sequence_array, path, config_image_prefix_tensor[algo_idx][idx])

Writing to:./results/Iterative/apply/sequence/
Writing to:./results/Recursive/apply/sequence/
Writing to:./results/HigherOrder/apply/sequence/
Writing to:./results/ListComprehension/apply/sequence/
Writing to:./results/Iterative/condition_sum/sequence/
Writing to:./results/Recursive/condition_sum/sequence/
Writing to:./results/HigherOrder/condition_sum/sequence/
Writing to:./results/ListComprehension/condition_sum/sequence/
Writing to:./results/Iterative/find/sequence/
Writing to:./results/Recursive/find/sequence/
Writing to:./results/HigherOrder/find/sequence/
Writing to:./results/ListComprehension/find/sequence/
Writing to:./results/Iterative/is_prime/sequence/
Writing to:./results/Recursive/is_prime/sequence/
Writing to:./results/HigherOrder/is_prime/sequence/
Writing to:./results/ListComprehension/is_prime/sequence/
Writing to:./results/Iterative/max/sequence/
Writing to:./results/Recursive/max/sequence/
Writing to:./results/HigherOrder/max/sequence/
Writing to:./results/ListCompre