# Cognitive Load and Wearable Integration Analysis


## Files and Data Relationship Description


In [23]:
# import os
# import pandas as pd
# import matplotlib.pyplot as plt


# def visualize_csv(csv_path):
#     # Read CSV file
#     df = pd.read_csv(csv_path)

#     # Plot the data
#     plt.figure(figsize=(10, 6))
#     for column in df.columns:
#         if column != "time":
#             plt.plot(df["time"], df[column], label=column)
#     plt.xlabel("Time")
#     plt.ylabel("Value")
#     plt.title("Data from {}".format(os.path.basename(csv_path)))
#     plt.legend()
#     plt.show()


# def process_directory(root_dir):
#     for subdir, dirs, files in os.walk(root_dir):
#         for file in files:
#             if file.endswith(".csv") and (
#                 "empatica" in file.lower() or "samsung" in file.lower()
#             ):
#                 csv_path = os.path.join(subdir, file)
#                 try:
#                     visualize_csv(csv_path)
#                 except Exception as e:
#                     print(f"Error processing file {csv_path}: {e}")


# # if __name__ == "__main__":
# #     root_directory = "./data/cogwear"
# #     process_directory(root_directory)

In [2]:
# import os
# import pandas as pd


# def process_directory(root_dir, output_file):
#     # Create an empty list to store aggregated data
#     agg_data = []

#     # Iterate through the participant directories
#     for participant_dir in os.listdir(root_dir):
#         participant_id = (
#             participant_dir  # Participant ID is the name of the subdirectory
#         )

#         # Iterate through the subdirectories (baseline and cognitive_load)
#         for sub_dir in ["baseline", "cognitive_load"]:
#             sub_dir_path = os.path.join(root_dir, participant_dir, sub_dir)

#             # Check if the path is a directory
#             if os.path.isdir(sub_dir_path):
#                 # Get the cognitive load label
#                 cl = 0 if sub_dir == "baseline" else 1

#                 # Iterate through the files in the subdirectory
#                 for file in os.listdir(sub_dir_path):
#                     if file.endswith(".csv") and file.startswith(
#                         ("empatica_bvp", "empatica_eda", "empatica_temp", "samsung_bvp")
#                     ):
#                         file_path = os.path.join(sub_dir_path, file)
#                         df = pd.read_csv(file_path)
#                         # Extract relevant data and append to the aggregated list
#                         for _, row in df.iterrows():
#                             agg_data.append(
#                                 {
#                                     "participant_id": participant_id,
#                                     "empatica_bvp": row.get("bvp"),
#                                     "empatica_bvp_time": (
#                                         row.get("time")
#                                         if "empatica_bvp" in file
#                                         else None
#                                     ),
#                                     "empatica_eda": row.get("eda"),
#                                     "empatica_eda_time": (
#                                         row.get("time")
#                                         if "empatica_eda" in file
#                                         else None
#                                     ),
#                                     "empatica_temp": row.get("temp"),
#                                     "empatica_temp_time": (
#                                         row.get("time")
#                                         if "empatica_temp" in file
#                                         else None
#                                     ),
#                                     "samsung_bvp": row.get("PPG GREEN"),
#                                     "samsung_bvp_time": (
#                                         row.get("time")
#                                         if "samsung_bvp" in file
#                                         else None
#                                     ),
#                                     "CL": cl,
#                                 }
#                             )

#     # Create a DataFrame from the aggregated data
#     agg_df = pd.DataFrame(agg_data)

#     # Write aggregated DataFrame to CSV file
#     agg_df.to_csv(output_file, index=False)

# if __name__ == "__main__":
#     root_directory = "./data/cogwear/pilot"
#     output_file = "./data/processed/cogwear-agg.csv"
#     process_directory(root_directory, output_file)

In [3]:
import pandas as pd
import numpy as np


def aggregate_by_time(df):
    # Create an empty list to store aggregated data
    agg_data = []

    # Iterate over each participant
    for participant_id in df["participant_id"].unique():
        participant_df = df[df["participant_id"] == participant_id]

        # Iterate over each time sequence
        min_time = participant_df["empatica_bvp_time"].min()
        max_time = participant_df["empatica_bvp_time"].max()
        intervals = np.arange(min_time, max_time + 5, 5)

        for interval_start, interval_end in zip(intervals[:-1], intervals[1:]):
            interval_data = {"participant_id": participant_id}

            for column in [
                "empatica_bvp",
                "empatica_eda",
                "empatica_temp",
                "samsung_bvp",
            ]:
                interval_values = participant_df[
                    (participant_df["empatica_bvp_time"] >= interval_start)
                    & (participant_df["empatica_bvp_time"] < interval_end)
                ][column].tolist()
                if interval_values:
                    interval_data[column] = interval_values
                    interval_data[column + "_time"] = [interval_start] * len(
                        interval_values
                    )

            if interval_data:  # Check if interval_data is not empty
                agg_data.append(interval_data)

    # Create DataFrame from aggregated data
    agg_df = pd.DataFrame(agg_data)
    return agg_df


if __name__ == "__main__":
    # Read the existing cogwear-agg.csv
    cogwear_df = pd.read_csv("./data/processed/cogwear-agg.csv")

    # Aggregate the data by time sequences and participant IDs
    aggregated_df = aggregate_by_time(cogwear_df)

    # Write the aggregated data to a new CSV file
    aggregated_df.to_csv("./data/processed/cogwear-agg-time-secv.csv", index=False)