In [1]:
# static_movements = [
#     "Quiet-Standing-Eyes-Open",
#     "Quiet-Standing-Eyes-Closed",
#     "Foam_Quiet-Standing-Eyes-Open",
#     "Foam_Quiet-Standing-Eyes-Closed",
#     "Semi-Tandem-Balance",
#     "Tandem-Balance",
#     "Unilateral-Stance-Eyes-Open",
#     "Unilateral-Stance-Eyes-Closed"
# ]



In [2]:
import os
import pandas as pd

# CHANGE THIS to the root of the KINECAL dataset
BASE = "/home/timo/mdf/data/kinecal"   # e.g. .../kinecal/1.0.3/kinecal

all_dfs = []

for root, dirs, files in os.walk(BASE):
    if "sway_metrics.csv" in files:
        fpath = os.path.join(root, "sway_metrics.csv")
        try:
            df = pd.read_csv(fpath)
        except Exception as e:
            print(f"[WARN] Could not read {fpath}: {e}")
            continue

        # Drop junk unnamed columns if present
        df = df.loc[:, ~df.columns.str.contains("^Unnamed")]

        # Some files might not have movement/part_id columns
        # → derive them from the path if missing.
        if "movement" not in df.columns or "part_id" not in df.columns:
            # Example path: .../kinecal/2/2_Semi-Tandem-Balance/sway_metrics/sway_metrics.csv
            parts = os.path.normpath(root).split(os.sep)
            # last movement folder name like "2_Semi-Tandem-Balance"
            mov_folder = parts[-2] if parts[-1] == "sway_metrics" else parts[-1]
            try:
                pid_str, movement_name = mov_folder.split("_", 1)
                movement_name = movement_name  # already looks like "Semi-Tandem-Balance"
                part_id = int(pid_str)
            except ValueError:
                # Fallback: don’t break if naming is unexpected
                movement_name = mov_folder
                part_id = None

            if "movement" not in df.columns:
                df["movement"] = movement_name
            if "part_id" not in df.columns:
                df["part_id"] = part_id

        all_dfs.append(df)

# Concatenate everything
if not all_dfs:
    raise RuntimeError("No sway_metrics.csv files were found. Check BASE path.")

sway_all = pd.concat(all_dfs, ignore_index=True)

# Clean column names a bit (optional)
sway_all = sway_all.loc[:, ~sway_all.columns.str.contains("^Unnamed")]
sway_all["movement"] = sway_all["movement"].astype(str)

# If clinically-at-risk is hyphenated, normalize once
sway_all = sway_all.rename(columns={"clinically-at-risk": "clinically_at_risk"})

print("Total rows from all movements:", len(sway_all))
print("Unique movements found:", sway_all["movement"].unique())


Total rows from all movements: 521
Unique movements found: ['Semi-Tandem-Balance' 'Quiet-Standing-Eyes-Open'
 'Quiet-Standing-Eyes-Closed' 'Tandem-Balance'
 'Unilateral-Stance-Eyes-Open' 'Unilateral-Stance-Eyes-Closed'
 'Foam-Quiet-Standing-Eyes-Closed' 'Foam-Quiet-Standing-Eyes-Open'
 'Quiet-Standing-Eyes-Open_backup' 'Unilateral-Stance-Eyes-Open_fail-16s'
 'Foam-Quiet-Standing-Eyes-Open (copy)'
 'Foam-Quiet-Standing-Eyes-Closed (copy)']


In [3]:
static_movements = [
    "Quiet-Standing-Eyes-Open",
    "Quiet-Standing-Eyes-Closed",
    "Foam-Quiet-Standing-Eyes-Open",
    "Foam-Quiet-Standing-Eyes-Closed",
    "Semi-Tandem-Balance",
    "Tandem-Balance",
    "Unilateral-Stance-Eyes-Open",
    "Unilateral-Stance-Eyes-Closed",
]


In [4]:
sway_static = sway_all[sway_all["movement"].isin(static_movements)].copy()

print("Rows after static filter:", len(sway_static))
print("Movements in static set:", sway_static["movement"].unique())


Rows after static filter: 517
Movements in static set: ['Semi-Tandem-Balance' 'Quiet-Standing-Eyes-Open'
 'Quiet-Standing-Eyes-Closed' 'Tandem-Balance'
 'Unilateral-Stance-Eyes-Open' 'Unilateral-Stance-Eyes-Closed'
 'Foam-Quiet-Standing-Eyes-Closed' 'Foam-Quiet-Standing-Eyes-Open']


In [5]:
sway_static["is_faller"] = sway_static["group"].isin(["FHs", "FHm"]).astype(int)

In [6]:
sway_static.columns

Index(['part_id', 'movement', 'group', 'age', 'sex', 'height', 'weight', 'BMI',
       'recorded_in_the_lab', 'clinically_at_risk', 'RDIST_ML', 'RDIST_AP',
       'RDIST', 'MDIST_ML', 'MDIST_AP', 'MDIST', 'TOTEX_ML', 'TOTEX_AP',
       'TOTEX', 'MVELO_ML', 'MVELO_AP', 'MVELO', 'MFREQ_ML', 'MFREQ_AP',
       'MFREQ', 'AREA_CE', 'part', 'acton', 'sway_metric_name',
       'impairment_self', 'impairment_confedence', 'impairment_clinical',
       'impairment_stats', 'is_faller'],
      dtype='object')

In [7]:
sway_static = sway_static.drop(columns=['impairment_self', 'impairment_confedence', 'impairment_clinical',
       'impairment_stats'])

In [8]:

metric_cols = [
    "RDIST_ML", "RDIST_AP", "RDIST",
    "MDIST_ML", "MDIST_AP", "MDIST",
    "TOTEX_ML", "TOTEX_AP", "TOTEX",
    "MVELO_ML", "MVELO_AP", "MVELO",
    "MFREQ_ML", "MFREQ_AP", "MFREQ",
    "AREA_CE",
]

participant_features = sway_static.pivot_table(
    index="part_id",
    columns="movement",
    values=metric_cols,
    aggfunc="mean",
)

# Flatten multi-index columns
participant_features.columns = [
    f"{metric}_{movement}"
    for metric, movement in participant_features.columns
]

participant_features = participant_features.reset_index()  # bring part_id back as a column


In [13]:
reg = pd.read_csv("data/register_processed.csv")

# Optional: handle the '>89' age entry nicely
# Keep original 'age' if you want, and add a numeric version:
reg["age_num"] = pd.to_numeric(reg["age"].replace({">89": "90"}), errors="coerce")

In [14]:
# If you like, pick exactly the demo/label columns you want:
demo_cols = [
    "part_id",
    "group",
    "age_num",            # or "age" if you prefer
    "sex",
    "height",
    "weight",
    "BMI",
    "recorded_in_the_lab",
    "faller",             # your binary label
]

demo = reg[demo_cols].copy()

# Merge: part_id is numeric in both
final_features = participant_features.merge(demo, on="part_id", how="left")


In [15]:
print(final_features.shape)
print(final_features.columns[:15])
print(final_features[["part_id", "group", "age_num", "faller"]].head())


(90, 137)
Index(['part_id', 'AREA_CE_Foam-Quiet-Standing-Eyes-Closed',
       'AREA_CE_Foam-Quiet-Standing-Eyes-Open',
       'AREA_CE_Quiet-Standing-Eyes-Closed',
       'AREA_CE_Quiet-Standing-Eyes-Open', 'AREA_CE_Semi-Tandem-Balance',
       'AREA_CE_Tandem-Balance', 'AREA_CE_Unilateral-Stance-Eyes-Closed',
       'AREA_CE_Unilateral-Stance-Eyes-Open',
       'MDIST_Foam-Quiet-Standing-Eyes-Closed',
       'MDIST_Foam-Quiet-Standing-Eyes-Open',
       'MDIST_Quiet-Standing-Eyes-Closed', 'MDIST_Quiet-Standing-Eyes-Open',
       'MDIST_Semi-Tandem-Balance', 'MDIST_Tandem-Balance'],
      dtype='object')
   part_id group  age_num  faller
0        2    HA       54       0
1        3   FHm       71       1
2        4    HA       38       0
3        5    HA       56       0
4        6    HA       55       0


In [17]:
final_features.to_csv("data/sway_static_features.csv", index=False)