<h1> Data Analysis for REYeker</h1>

In [None]:
# lib for dataframes
# lib for crerating paths
from pathlib import Path

# lib for plotting
%matplotlib inline
import matplotlib.pyplot as plt

# REYeker lib
import modules.rEYEkerAnalysis as rEYEker

# lib for numerical computations
import numpy as np
import pandas as pd
import sonaion_analysis as son

# lib for saving np images
from PIL import Image
import scipy.stats as stats

## 1. Configuration

### Database configuration

In [None]:
# path to the datafile
config_datasheet_path = r"./results/preprocessed.xlsx"

# columns with visual stimulus data
config_visual_stimulus_variable = "ClickData"

# colums of response time
config_visual_time_variable = "TimeData"

# columns with programming style
config_programming_style_variable = "ProgrammingStyle"

# columns with names of the algo
config_algo_name_variable = "Algorithm"

# columns with correctness value
config_corectness_variable = "Correctness"

# colums of response time
config_response_time_variable = "ResponseTime"

In [None]:
click_settings = rEYEker.load_settings_from_json("./data/settings.json")

### Import the preprocessed dataframe

In [None]:
needed_columns = [
    "ID",
    config_programming_style_variable,
    config_algo_name_variable,
    config_corectness_variable,
    config_response_time_variable,
    config_visual_stimulus_variable,
    config_visual_time_variable,
]

raw = pd.read_excel(config_datasheet_path)
df = pd.DataFrame(raw, columns=needed_columns)
algo_name_array = [name for name in df[config_algo_name_variable].unique()]

df_tensor = []

for algo_name in algo_name_array:
    algo_df = df.loc[df[config_algo_name_variable] == algo_name]
    df_array = [
        algo_df.loc[algo_df[config_programming_style_variable] == "iterative"],
        algo_df.loc[algo_df[config_programming_style_variable] == "recursive"],
        algo_df.loc[algo_df[config_programming_style_variable] == "higher-order"],
        algo_df.loc[algo_df[config_programming_style_variable] == "list-comprehension"],
    ]
    df_tensor.append(df_array)

In [None]:
# data for loading the images
image_path_tensor = []

for algo_name in algo_name_array:
    image_path_array = [
        "./data/images/" + algo_name + "_iterative.png",
        "./data/images/" + algo_name + "_recursive.png",
        "./data/images/" + algo_name + "_higher_order.png",
        "./data/images/" + algo_name + "_list_comprehension.png",
    ]
    image_path_tensor.append(image_path_array)

# where to save to heatmaps and sequence diagrams
config_folder_prefix_array = ["Iterative/", "Recursive/", "HigherOrder/", "ListComprehension/"]

# used for saving the heatmaps and sequence diagrams
config_image_prefix_tensor = []
for algo_name in algo_name_array:
    image_prefix_array = [
        "Iteraitive_" + algo_name + "_",
        "Recursive_" + algo_name + "_",
        "Higher-Order_" + algo_name + "_",
        "List-Comprhension_" + algo_name + "_",
    ]
    config_image_prefix_tensor.append(image_prefix_array)

## Import Images Settings

In [None]:
image_tensor = []

# read in every image
for image_path_array in image_path_tensor:
    image_array = []
    for image_path in image_path_array:
        img = rEYEker.load_image(image_path)
        image_array.append(img)
    image_tensor.append(image_array)

## Cast Data to Valid format

## Import the visual stimulus measured Data

In [None]:
visual_stimulus_data_tensor = []
time_data_tensor = []

# iter over every dataframe
for df_array in df_tensor:

    visual_stimulus_data_matrix = []
    time_data_matrix = []
    for idx, dataframe in enumerate(df_array):
        visual_stimulus_array = []
        time_data_array = []

        # iter over every row
        for _idx, row in dataframe.iterrows():
            data_str = row[config_visual_stimulus_variable]
            data_str = str(data_str).strip()
            coordinates_str = data_str.split(" ")
            coordinates = []

            times_str = row[config_visual_time_variable]
            times_str = str(times_str).strip()
            times_str = times_str.split(" ")
            times = []

            # iter over every coordinate pair x-y
            for coordinate_str in coordinates_str:
                try:
                    coordinate = coordinate_str.split("-")
                    coordinate = (int(coordinate[0]), int(coordinate[1]))
                    coordinates.append(coordinate)
                except:
                    coordinates = [None]
                    break

            for time_str in times_str:
                try:
                    time_str = int(time_str)
                    times.append(time_str)
                except:
                    times = [None]
                    break

            visual_stimulus_array.append(coordinates)
            time_data_array.append(times)

        visual_stimulus_data_matrix.append(visual_stimulus_array)
        time_data_matrix.append(time_data_array)

    visual_stimulus_data_tensor.append(visual_stimulus_data_matrix)
    time_data_tensor.append(time_data_matrix)

### Helper Functions

In [None]:
def save_images(image_array, folder, image_name):
    """
    :brief saves an array of images to a certain location incrementing the postfix by a number
    :param image_array:        array of images (np.ndarray)
    :param folder:     prefix of image/ folder location
    :param image_name: prefix for the image
    """

    Path(folder).mkdir(parents=True, exist_ok=True)

    prefix = folder + image_name

    # TODO create folders if there are none present
    for idx, data in enumerate(image_array):
        data = data * 255
        data = np.uint8(data)
        im = Image.fromarray(data)
        im.save(prefix + str(idx) + ".png")


def compare_for_h0(arr_1, arr_2, alpha):
    t, p = stats.ttest_ind(arr_1, arr_2)
    if p > alpha:
        return True, t, p
    else:
        return False, t, p


def is_in(value, tup):
    return tup[0] <= value <= tup[1]


def get_0_offset(number):
    i = 0
    number = int(number)
    while number != 0:
        number = int(number / 10)
        i = i + 1
    return i

## 2. Create Single Heatmaps

### create heatmaps

In [None]:
heatmap_tensor = []

print("Going to process " + str(len(visual_stimulus_data_tensor)) + " datatables: ")
for algo_idx in range(len(visual_stimulus_data_tensor)):
    heatmaps_matrix = []
    print("\tGoing to process algorithm #" + str(algo_idx) + ":")

    # iterate over all the datasets
    for implementation_idx in range(len(visual_stimulus_data_tensor[algo_idx])):

        print("\t\tImplementation Variant #" + str(implementation_idx) + ":")
        heatmap_array = []

        print("\t\t\t", end="")
        # iterate over all the measurements of the dataset
        for dataset_idx in range(len(visual_stimulus_data_tensor[algo_idx][implementation_idx])):
            click_data = visual_stimulus_data_tensor[algo_idx][implementation_idx][dataset_idx]
            time_data = time_data_tensor[algo_idx][implementation_idx][dataset_idx]
            image = image_tensor[algo_idx][implementation_idx]

            print("-", end="")
            try:
                im = rEYEker.draw_shape_heat_map(image, click_data, click_settings, should_copy=True,
                                                 time_stamps=time_data)
                heatmap_array.append(im)
            except:
                heatmap_array.append(image)

        print()
        heatmaps_matrix.append(heatmap_array)

    heatmap_tensor.append(heatmaps_matrix)


save Heatmaps

In [None]:
for algo_idx, heatmaps_matrix in enumerate(heatmap_tensor):
    for idx, heatmap_array in enumerate(heatmaps_matrix):
        path = "./results/" + config_folder_prefix_array[idx] + str(algo_name_array[algo_idx]) + "/heatmaps/"
        print("Writing to:" + path)
        save_images(heatmap_array, path, config_image_prefix_tensor[algo_idx][idx])

## 3. Create Average Heatmaps

### create heatmaps

In [None]:
heatmap_tensor = []
mask_tensor = []

print("Going to process " + str(len(visual_stimulus_data_tensor)) + " datatables: ")
for algo_idx in range(len(visual_stimulus_data_tensor)):
    average_heatmap_array = []
    mask_array = []
    print("\tGoing to process algorithm #" + str(algo_idx) + ":")

    # iterate over all the datasets
    for implementation_idx in range(len(visual_stimulus_data_tensor[algo_idx])):
        print("\t\tImplementation Variant #" + str(implementation_idx) + ":")

        image = image_tensor[algo_idx][implementation_idx]
        click = visual_stimulus_data_tensor[algo_idx][implementation_idx]
        time = time_data_tensor[algo_idx][implementation_idx]

        click_tmp = []
        time_tmp = []

        for i in range(len(click)):
            if click[i] != [None]:
                click_tmp.append(click[i])
                time_tmp.append(time[i])

        im, mask = rEYEker.draw_average_shape_heat_map_rel(image, click_tmp, click_settings, 1.0, 0.0, time_tmp,
                                                           should_copy=True)

        average_heatmap_array.append(im)
        mask_array.append(mask)

    print()
    heatmap_tensor.append(average_heatmap_array)
    mask_tensor.append(mask_array)

### save heatmaps

In [None]:
for algo_idx, heatmaps_matrix in enumerate(heatmap_tensor):
    for idx, heatmap in enumerate(heatmaps_matrix):
        path = "./results/" + config_folder_prefix_array[idx] + str(algo_name_array[algo_idx]) + "/"
        print("Writing to:" + path)
        save_images([heatmap], path, config_image_prefix_tensor[algo_idx][idx])

## 4. Create Sequence diagramms

### create sequence diagrams

In [None]:
sequence_tensor = []

print("Going to process " + str(len(visual_stimulus_data_tensor)) + " datatables: ")
for algo_idx in range(len(visual_stimulus_data_tensor)):
    sequence_matrix = []
    print("\tGoing to process algorithm #" + str(algo_idx) + ":")

    # iterate over all the datasets
    for implementation_idx in range(len(visual_stimulus_data_tensor[algo_idx])):

        print("\t\tImplementation Variant #" + str(implementation_idx) + ":")
        sequence_array = []

        print("\t\t\t", end="")
        # iterate over all the measurements of the dataset
        for dataset_idx in range(len(visual_stimulus_data_tensor[algo_idx][implementation_idx])):
            click_data = visual_stimulus_data_tensor[algo_idx][implementation_idx][dataset_idx]
            time_data = time_data_tensor[algo_idx][implementation_idx][dataset_idx]
            image = image_tensor[algo_idx][implementation_idx]

            click_tmp = []
            time_tmp = []

            for i in range(len(click_data)):
                if click_data[i] != [None]:
                    click_tmp.append(click_data[i])
                    time_tmp.append(time_data[i])

            print("-", end="")
            try:
                im = rEYEker.draw_vertical_line_diagram(image, click_tmp, should_copy=True)
                sequence_array.append(im)
            except:
                sequence_array.append(image)

        print()
        sequence_matrix.append(sequence_array)

    sequence_tensor.append(sequence_matrix)

### save sequence diagrams

In [None]:
for algo_idx, sequence_matrix in enumerate(sequence_tensor):
    for idx, sequence_array in enumerate(sequence_matrix):
        path = "./results/" + config_folder_prefix_array[idx] + str(algo_name_array[algo_idx]) + "/sequence/"
        print("Writing to:" + path)
        save_images(sequence_array, path, config_image_prefix_tensor[algo_idx][idx])

In [None]:
Path("./results/Iterative/apply/sequence/").mkdir(parents=True, exist_ok=True)