<h1>Merge DataFrame Together To Useful Dataformat</h1>

<h2>Imports And Makros</h2>

In [1]:
import os

import mne

mne.set_log_level("WARNING")

import re
from multiprocessing import Pool

import dask.dataframe as dd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import swifter
from psychopy.misc import fromFile

%matplotlib widget

<h2>Potential Function Arguments</h2>

In [2]:
participant_number = 1
cores = 12
questionaire_path = "./rawData/Questionnaire.xlsx"
participant_folder = "./rawData/Participant" + str(participant_number).zfill(2) + "/"
eyetracking_path = participant_folder + "experiment_data.csv"
eeg_path = participant_folder
psychopy_csv_path = participant_folder + "data/"
psychopy_log_path = participant_folder + "data/"
for (dirpath, dirnames, filenames) in os.walk(psychopy_csv_path):
    for file in filenames:
        _file, ext = os.path.splitext(file)
        if ext == ".csv":
            psychopy_csv_path += file
        if ext == ".log":
            psychopy_log_path += file
for (dirpath, dirnames, filenames) in os.walk(participant_folder):
    for file in filenames:
        _file, ext = os.path.splitext(file)
        if ext == ".fif":
            eeg_path += file

<h2>Get Data From participant 1</h2>

In [3]:
df_questionaire = pd.read_excel(questionaire_path)
df_questionaire = df_questionaire.transpose()
df_questionaire.columns = df_questionaire.iloc[0]
df_questionaire = df_questionaire.iloc[1:]
df_questionaire = df_questionaire.loc[participant_number]

<h2>Read Eyetracking Data</h2>

In [4]:
df_eyetracking = pd.read_csv(eyetracking_path, header=None, sep=";")

In [5]:
three_extractor_compiled = re.compile("\((.*), (.*), (.*)\)")
two_extractor_compiled = re.compile("\((.*), (.*)\)")


def three_extractor(value):
    pattern = three_extractor_compiled.match(value)
    return float(pattern.group(1)), float(pattern.group(2)), float(pattern.group(3))


def two_extractor(value):
    pattern = two_extractor_compiled.match(value)
    return float(pattern.group(1)), float(pattern.group(2))


meta_type = dd.utils.make_meta(0.0)
ddf_eyetracking = dd.from_pandas(df_eyetracking, npartitions=cores)
df_0 = pd.DataFrame(
    ddf_eyetracking.apply(lambda x: three_extractor(x[0]), meta=meta_type, axis=1).compute().transpose().tolist(),
    columns=["l_gaze_point_in_user_coordinate_system_x", "l_gaze_point_in_user_coordinate_system_y", "l_gaze_point_in_user_coordinate_system_z",],
)
df_1 = pd.DataFrame(ddf_eyetracking[1].compute().transpose().tolist(), columns=["l_valid"])
df_2 = pd.DataFrame(ddf_eyetracking[2].compute().transpose().tolist(), columns=["r_valid"])
df_3 = pd.DataFrame(
    ddf_eyetracking.apply(lambda x: three_extractor(x[3]), meta=meta_type, axis=1).compute().transpose().tolist(),
    columns=["r_gaze_point_in_user_coordinate_system_x", "r_gaze_point_in_user_coordinate_system_y", "r_gaze_point_in_user_coordinate_system_z",],
)
df_4 = pd.DataFrame(
    ddf_eyetracking.apply(lambda x: three_extractor(x[4]), meta=meta_type, axis=1).compute().transpose().tolist(),
    columns=["l_gaze_origin_in_user_coordinate_system_x", "l_gaze_origin_in_user_coordinate_system_y", "l_gaze_origin_in_user_coordinate_system_z",],
)
df_5 = pd.DataFrame(
    ddf_eyetracking.apply(lambda x: three_extractor(x[5]), meta=meta_type, axis=1).compute().transpose().tolist(),
    columns=["r_gaze_origin_in_user_coordinate_system_x", "r_gaze_origin_in_user_coordinate_system_y", "r_gaze_origin_in_user_coordinate_system_z",],
)
df_6 = pd.DataFrame(ddf_eyetracking.apply(lambda x: two_extractor(x[6]), meta=meta_type, axis=1).compute().transpose().tolist(), columns=["l_display_x", "l_display_y"],)
df_7 = pd.DataFrame(ddf_eyetracking.apply(lambda x: two_extractor(x[7]), meta=meta_type, axis=1).compute().transpose().tolist(), columns=["r_display_x", "r_display_y"],)
df_8 = pd.DataFrame(ddf_eyetracking[8].compute().transpose().tolist(), columns=["time"])
df_9 = pd.DataFrame(ddf_eyetracking[9].compute().transpose().tolist(), columns=["l_pupil_diameter"])
df_10 = pd.DataFrame(ddf_eyetracking[10].compute().transpose().tolist(), columns=["r_pupil_diameter"])
df_eyetracking = pd.concat([df_0, df_1, df_2, df_3, df_4, df_5, df_6, df_7, df_8, df_9, df_10], axis=1)

In [6]:
t_0 = df_eyetracking["time"][0]
df_eyetracking["time"] = (df_eyetracking["time"].astype(float) - t_0) / (1000000.0)

<h2>Read EEG Data</h2>

In [7]:
def rescale(data):
    # Scaling factor (to obtain values in [V], depends on device and settings etc.)
    scaling_factor = 7e-9
    return scaling_factor * data


def get_events_from_info(inst):
    """ Helper to read events from the info field directly; specific to some of our recordings """
    eventsMNE = []
    eventsFromFIF = inst.info["events"]
    for i in range(0, len(eventsFromFIF)):
        if eventsFromFIF[i].get("list") is not None:
            content = eventsFromFIF[i].get("list")
            content_list = content.tolist()
            content_new = [content_list[2], content_list[1], content_list[0]]
            eventsMNE.append(content_new)
        elif eventsFromFIF[i].get("channels") is not None:
            raise
            # content = eventsFromFIF[i].get('channels')
        else:
            print("fiftools: Type of entry #" + str(i + 1) + "unkown.")
    eventsMNE = np.array(eventsMNE)
    return eventsMNE


raw = mne.io.read_raw_fif(fname=eeg_path, preload=True)
raw.apply_function(rescale, picks=["eeg"])


# Preprocessing: Bandpass filter (0.5 to 80 Hz) and notch filter (power net frequency and harmonics)
raw.filter(0.5, 80)
raw.notch_filter([50, 100]);

  raw = mne.io.read_raw_fif(fname=eeg_path, preload=True)


In [8]:
picks = raw.pick_types(eeg=True)
data = raw.get_data(picks=["eeg"])

channel_names = raw.info["ch_names"]
sampling_rate = raw.info["sfreq"]

events = get_events_from_info(raw)
event_ids = events[:, 2]

indices_events = events[:, 0]
t_events = event_ids / sampling_rate

columns = [
    "Snippet",
    "SnippetStart",
    "SnippetStop",
    "InputStart",
    "InputStop",
    "OutputStart",
    "OutputStop",
]
df_time = pd.DataFrame([], columns=columns)
for i in range(0, len(t_events)):
    if indices_events[i] > 100:
        continue
    df_time = df_time.append(pd.DataFrame([[None, t_events[i + 1], t_events[i + 2], t_events[i + 2], t_events[i + 3], t_events[i + 3], None,]], columns=columns,))
df_time = df_time.reset_index(drop=True)

In [9]:
# Snippet0 = raw.copy().crop(0.0, 12.0)
# Snippet0.plot_psd(show=False)
# Snippet0.plot(block=True)

<h2>Read PsychoPy Data</h2>

In [10]:
def to_file_name(path):
    file, _ext = os.path.splitext(path)
    return file.split("\\")[-1]


def map_to_answer(answer):
    if "Right" in answer:
        return "Right"
    if "Wrong1" in answer:
        return "Wrong1"
    if "Wrong2" in answer:
        return "Wrong2"
    if "None" in answer:
        return "Wrong3"
    if "Skipped" in answer:
        return "Skipped"


df_psydata = pd.read_csv(psychopy_csv_path)
df_psydata = df_psydata[
    ["ImagePath", "Image.started", "Image.stopped", "InputPath", "image.started", "image.stopped", "ImagePathInputs", "image_1.started", "image_1.stopped", "ChoosenAnwer", "image_7.started",]
]
df_psydata = df_psydata[df_psydata["ImagePath"].notna()]
df_psydata.insert(0, "Snippet", df_psydata["ImagePath"].apply(to_file_name))
df_psydata["ChoosenAnwer"] = df_psydata["ChoosenAnwer"].apply(map_to_answer)
df_psydata = df_psydata.reset_index(drop=True)
df_psydata = df_psydata.rename(columns={"Image.started": "SnippetStart", "Image.stopped": "SnippetStop"})
df_psydata = df_psydata.rename(columns={"image.started": "InputStart", "image.stopped": "InputStop"})
df_psydata = df_psydata.rename(columns={"image_1.started": "OutputStart", "image_1.stopped": "OutputStop"})
df_psydata = df_psydata.rename(columns={"image_7.started": "CrossStart"})
df_psydata["SnippetStop"] = df_psydata["InputStart"]
df_psydata["InputStop"] = df_psydata["OutputStart"]
df_psydata["OutputStop"] = df_psydata["CrossStart"]
df_psydata = df_psydata.drop(["ImagePath", "InputPath", "ImagePathInputs", "CrossStart"], axis=1)

start_time = df_psydata["SnippetStart"][0]
df_psydata["SnippetStart"] = df_psydata["SnippetStart"] - start_time
df_psydata["SnippetStop"] = df_psydata["SnippetStop"] - start_time
df_psydata["InputStart"] = df_psydata["InputStart"] - start_time
df_psydata["InputStop"] = df_psydata["InputStop"] - start_time
df_psydata["OutputStart"] = df_psydata["OutputStart"] - start_time
df_psydata["OutputStop"] = df_psydata["OutputStop"] - start_time

In [11]:
df_psylog = pd.read_csv(psychopy_log_path, header=None, sep="\t")
df_psylog.columns = ["time", "type", "message"]
df_psylog["time"] = df_psylog["time"] - start_time
df_questionaire

Frage\Participant:
age                                             27
sex                                           male
hand                                         right
colorblindness                                  no
concentrationDifficulty                          1
readingDifficulty                                1
job                        Universitätsmitarbeiter
education                                   Master
Name: 1, dtype: object

<h2>Bring Data Together and Groub them by Snippet</h2>

In [12]:
# set snippet name and endtime of each snippet
df_time["Snippet"] = df_psydata["Snippet"]
df_time["OutputStop"] = df_time["OutputStart"] + df_psydata["OutputStop"] - df_psydata["OutputStart"]

In [13]:
import json

result = {}
template = {
    "Code": {"EyeTracking": None, "EEG": None, "Log": None, "Time": {"Start": None, "Stop": None,},},
    "Input": {"EyeTracking": None, "EEG": None, "Log": None, "Time": {"Start": None, "Stop": None,},},
    "Output": {"EyeTracking": None, "EEG": None, "Log": None, "Time": {"Start": None, "Stop": None,},},
    "Behavioral": None,
}
for index, row in df_psydata.iterrows():
    current = template.copy()
    # add data for code
    current["Code"]["EyeTracking"] = df_eyetracking[(df_eyetracking["time"] >= df_time["SnippetStart"][index]) & (df_eyetracking["time"] < df_time["SnippetStop"][index])]
    current["Code"]["EEG"] = raw.copy().crop(df_time["SnippetStart"][index], df_time["SnippetStop"][index])
    current["Code"]["Log"] = df_psylog[(df_psylog["time"] >= df_psydata["SnippetStart"][index]) & (df_psylog["time"] < df_psydata["SnippetStop"][index])]
    current["Code"]["Time"]["Start"] = df_psydata["SnippetStart"][index]
    current["Code"]["Time"]["Stop"] = df_psydata["SnippetStop"][index]

    # add data for input
    current["Input"]["EyeTracking"] = df_eyetracking[(df_eyetracking["time"] >= df_time["InputStart"][index]) & (df_eyetracking["time"] < df_time["InputStop"][index])]
    current["Input"]["EEG"] = raw.copy().crop(df_time["InputStart"][index], df_time["InputStop"][index])
    current["Input"]["Log"] = df_psylog[(df_psylog["time"] >= df_psydata["InputStart"][index]) & (df_psylog["time"] < df_psydata["InputStop"][index])]
    current["Input"]["Time"]["Start"] = df_psydata["InputStart"][index]
    current["Input"]["Time"]["Stop"] = df_psydata["InputStop"][index]

    # add data for input
    current["Output"]["EyeTracking"] = df_eyetracking[(df_eyetracking["time"] >= df_time["OutputStart"][index]) & (df_eyetracking["time"] < df_time["OutputStop"][index])]
    current["Output"]["EEG"] = raw.copy().crop(df_time["OutputStart"][index], df_time["OutputStop"][index])
    current["Output"]["Log"] = df_psylog[(df_psylog["time"] >= df_psydata["OutputStart"][index]) & (df_psylog["time"] < df_psydata["OutputStop"][index])]
    current["Output"]["Time"]["Start"] = df_psydata["OutputStart"][index]
    current["Output"]["Time"]["Stop"] = df_psydata["OutputStop"][index]

    current["Behavioral"] = df_psydata.iloc[index].to_frame().transpose()
    result[row["Snippet"]] = current.copy()

result["_Meta"] = df_questionaire.to_frame().transpose()

In [14]:
import copy
import sys
from pathlib import Path

general_path = "./filteredData/Participant01/"
eye_path = "./filteredData/Participant01/EyeTracker/"
eeg_path = "./filteredData/Participant01/EEG/"
log_path = "./filteredData/Participant01/LOG/"
behavioral_path = "./filteredData/Participant01/Behavioral/"
meta_path = "./filteredData/Participant01/Meta/"

Path(general_path).mkdir(parents=True, exist_ok=True)
Path(eye_path).mkdir(parents=True, exist_ok=True)
Path(eeg_path).mkdir(parents=True, exist_ok=True)
Path(log_path).mkdir(parents=True, exist_ok=True)
Path(behavioral_path).mkdir(parents=True, exist_ok=True)
Path(meta_path).mkdir(parents=True, exist_ok=True)

save = copy.deepcopy(result)
output = sys.stdout
for entry in result:
    if "_Meta" in entry:
        continue
    print(f"Saving {entry} Files ...", end="", file=output, flush=True)
    # save code
    result[entry]["Code"]["EyeTracking"].to_excel(eye_path + "Code_" + entry + ".xlsx", index=False)
    save[entry]["Code"]["EyeTracking"] = eye_path + "Code_" + entry + ".xlsx"

    print(".", end="", file=output, flush=True)
    result[entry]["Code"]["EEG"].save(eeg_path + "Code_" + entry + "_raw.fif", overwrite=True)
    save[entry]["Code"]["EEG"] = eeg_path + "Code_" + entry + "_raw.fif"

    print(".", end="", file=output, flush=True)
    result[entry]["Code"]["Log"].to_excel(log_path + "Code_" + entry + ".xlsx", index=False)
    save[entry]["Code"]["Log"] = log_path + "Code_" + entry + ".xlsx"

    # save input
    print(".", end="", file=output, flush=True)
    result[entry]["Input"]["EyeTracking"].to_excel(eye_path + "Input_" + entry + ".xlsx", index=False)
    save[entry]["Input"]["EyeTracking"] = eye_path + "Input_" + entry + ".xlsx"

    print(".", end="", file=output, flush=True)
    result[entry]["Input"]["EEG"].save(eeg_path + "Input_" + entry + "_raw.fif", overwrite=True)
    save[entry]["Input"]["EEG"] = eeg_path + "Input_" + entry + "_raw.fif"

    print(".", end="", file=output, flush=True)
    result[entry]["Input"]["Log"].to_excel(log_path + "Input_" + entry + ".xlsx", index=False)
    save[entry]["Input"]["Log"] = log_path + "Input_" + entry + ".xlsx"

    # save output
    print(".", end="", file=output, flush=True)
    result[entry]["Output"]["EyeTracking"].to_excel(eye_path + "Output_" + entry + ".xlsx", index=False)
    save[entry]["Output"]["EyeTracking"] = eye_path + "Output_" + entry + ".xlsx"

    print(".", end="", file=output, flush=True)
    result[entry]["Output"]["EEG"].save(eeg_path + "Output_" + entry + "_raw.fif", overwrite=True)
    save[entry]["Output"]["EEG"] = eeg_path + "Output_" + entry + "_raw.fif"

    print(".", end="", file=output, flush=True)
    result[entry]["Output"]["Log"].to_excel(log_path + "Output_" + entry + ".xlsx", index=False)
    save[entry]["Output"]["Log"] = log_path + "Output_" + entry + ".xlsx"

    # save behavioral
    print(".", file=output, flush=True)
    result[entry]["Behavioral"].to_excel(behavioral_path + entry + ".xlsx", index=False)
    save[entry]["Behavioral"] = behavioral_path + entry + ".xlsx"

result["_Meta"].to_excel(meta_path + "Meta.xlsx", index=False)
save["_Meta"] = meta_path + "Meta.xlsx"

Saving IsPrime Files ............
Saving SiebDesEratosthenes Files ............
Saving IsAnagram Files ............
Saving RemoveDoubleChar Files ............
Saving BinToDecimal Files ............
Saving PermuteString Files ............
Saving Power Files ............
Saving BinarySearch Files ............
Saving ContainsSubstring Files ............
Saving ReverseArray Files ............
Saving SumArray Files ............
Saving RectanglePower Files ............
Saving Vehicle Files ............
Saving GreatestCommonDivisor Files ............
Saving HIndex Files ............
Saving LengthOfLast Files ............
Saving MedianOnSorted Files ............
Saving SignChecker Files ............
Saving ArrayAverage Files ............
Saving DropNumber Files ............
Saving BinomialCoefficient Files ............
Saving Palindrome Files ............
Saving DumpSorting Files ............
Saving InsertSort Files ............
Saving HeightOfTree Files ............
Saving CheckIfLettersOnly 

In [15]:
with open(general_path + "DataBase.json", "w") as fp:
    json.dump(save, fp, indent=4, sort_keys=True)

In [27]:
participant = str(1).zfill(2)
json_path = f"./filteredData/Participant{participant}/" + "DataBase.json"
general_path = f"./filteredData/Participant{participant}/"
eye_path = f"./filteredData/Participant{participant}/EyeTracker/"
eeg_path = f"./filteredData/Participant{participant}/EEG/"
log_path = f"./filteredData/Participant{participant}/LOG/"
behavioral_path = f"./filteredData/Participant{participant}/Behavioral/"
meta_path = f"./filteredData/Participant{participant}/Meta/"

result
with open(json_path) as json_file:
    result = json.load(json_file)

for entry in result:
    if "_Meta" in entry:
        continue
    print(f"Loading {entry} Files ...", end="", file=output, flush=True)

    # load code
    result[entry]["Code"]["EyeTracking"] = pd.read_excel(eye_path + "Code_" + entry + ".xlsx")

    print(".", end="", file=output, flush=True)
    result[entry]["Code"]["EEG"] = mne.io.read_raw_fif(eeg_path + "Code_" + entry + "_raw.fif")

    print(".", end="", file=output, flush=True)
    result[entry]["Code"]["Log"] = pd.read_excel(log_path + "Code_" + entry + ".xlsx")

    # load input
    print(".", end="", file=output, flush=True)
    result[entry]["Input"]["EyeTracking"] = pd.read_excel(eye_path + "Input_" + entry + ".xlsx")

    print(".", end="", file=output, flush=True)
    result[entry]["Input"]["EEG"] = mne.io.read_raw_fif(eeg_path + "Input_" + entry + "_raw.fif")

    print(".", end="", file=output, flush=True)
    result[entry]["Input"]["Log"] = pd.read_excel(log_path + "Input_" + entry + ".xlsx")

    # load output
    print(".", end="", file=output, flush=True)
    result[entry]["Output"]["EyeTracking"] = pd.read_excel(eye_path + "Output_" + entry + ".xlsx")

    print(".", end="", file=output, flush=True)
    result[entry]["Output"]["EEG"] = mne.io.read_raw_fif(eeg_path + "Output_" + entry + "_raw.fif")

    print(".", end="", file=output, flush=True)
    result[entry]["Output"]["Log"] = pd.read_excel(log_path + "Output_" + entry + ".xlsx")

    # load behavioral
    print(".", file=output, flush=True)
    result[entry]["Behavioral"] = pd.read_excel(behavioral_path + entry + ".xlsx")

result["_Meta"] = pd.read_excel(meta_path + "Meta.xlsx")

Loading Ackerman Files ............
Loading ArrayAverage Files ............
Loading BinToDecimal Files ............
Loading BinarySearch Files ............
Loading BinomialCoefficient Files ............
Loading BogoSort Files ............
Loading CheckIfLettersOnly Files ............
Loading ContainsSubstring Files ............
Loading DropNumber Files ............
Loading DumpSorting Files ............
Loading GreatestCommonDivisor Files ............
Loading HIndex Files ............
Loading HeightOfTree Files ............
Loading InsertSort Files ............
Loading IsAnagram Files ............
Loading IsPrime Files ............
Loading LengthOfLast Files ............
Loading MedianOnSorted Files ............
Loading Palindrome Files ............
Loading PermuteString Files ............
Loading Power Files ............
Loading RabbitTortoise Files ............
Loading Rectangle Files ............
Loading RectanglePower Files ............
Loading RemoveDoubleChar Files ............
L