In [None]:
import os
import sys
sys.path.append("..")

In [None]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import collections as matcoll
%matplotlib inline

In [None]:
from data_utils.dataloader import Dataloader
from data_utils.preprocessor import Preprocessor
from data_utils import X_HEADER

In [None]:
data_base_path = os.path.join("..", "data", "TrainingData")
dataloader = Dataloader(data_base_path)

In [None]:
subject_ids = ["001", "002", "003", "004", "005", "006", "007", "008"]
session_numbers = ["01", "02", "03", "04", "05", "06", "07", "08"]
samplewise_stats = []
for subject_id in subject_ids:
    for session_number in session_numbers:
        try:
            x, y = dataloader.load_and_join_data(subject_id, session_number)
            stats = x.describe().reset_index(drop=False).rename(columns={"index": "stat"})
            stats = stats[stats["stat"].isin(["min", "max"])]
            stats = stats.drop(columns="time")
            samplewise_stats.append(stats)
            print(f"Subject {subject_id} session {session_number} done.")
        except FileNotFoundError:
            print(f"Subject {subject_id} session {session_number} not found.")

In [None]:
stat_df = pd.concat(samplewise_stats, axis=0)

In [None]:
stats_to_find = ["min", "max"]

In [None]:
aggregate_stats = []
for stat in stats_to_find:
    agg = getattr(stat_df.groupby("stat"), stat)()
    agg = agg.reset_index(drop=False)
    agg = agg[agg["stat"] == stat]
    aggregate_stats.append(agg)

In [None]:
final_stats = pd.concat(aggregate_stats, axis=0).reset_index(drop=True)
final_stats

In [None]:
final_stats.to_csv(os.path.join("..", "metadata", "statistics.csv"), header=True, index=False)

In [None]:
stats_dict = {feature: dict() for feature in X_HEADER}
for i, row in final_stats.iterrows():
    for feature in X_HEADER:
        stats_dict[feature][row["stat"]] = row[feature]

In [None]:
stats_dict

In [None]:
with open(os.path.join("..", "metadata", "statistics.json"), "w") as f:
    json.dump(stats_dict, f, indent=2)