## Dominant-hand filtering and technique-specific blanking for `data.csv`

This notebook:
1. Loads `data.csv`.
2. Infers each participant's dominant hand via paired t-tests on left/right metrics.
3. Blanks the non-dominant hand measurements and builds unified `dominant_*` columns.
4. Further blanks hand metrics for the `Chicken` technique and head metrics for `Astral`, `Grab`, `Sliding`, `Teleport`, and `Throw` techniques.


In [None]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_rel

raw_path = "data.csv"
df = pd.read_csv(raw_path)
print(f"Loaded {len(df)} rows and {df.shape[1]} columns from {raw_path}")
df.head()

### Metrics used to infer dominance
We test the mean difference between left and right values for the following pairs (paired t-test, alpha=0.05):
- Total distance
- Extent
- Thumbstick distance and extent
- Trigger pressure
- Grip pressure
- Head distance


In [None]:
hand_pairs = [
    ("leftTotalDistance", "rightTotalDistance", "totalDistance"),
    ("leftExtent", "rightExtent", "extent"),
    ("leftThumbstickDistance", "rightThumbstickDistance", "thumbstickDistance"),
    ("leftThumbstickExtent", "rightThumbstickExtent", "thumbstickExtent"),
    ("leftTriggerPressure", "rightTriggerPressure", "triggerPressure"),
    ("leftGripPressure", "rightGripPressure", "gripPressure"),
    ("leftHeadDistance", "rightHeadDistance", "headDistance"),
]

alpha = 0.05

def infer_dominant_hand(frame, pairs, alpha=0.05):
    dominant = {}
    rows = []
    for pid, group in frame.groupby("iD"):
        votes = []
        for left, right, label in pairs:
            cols = group[[left, right]].dropna()
            if len(cols) < 2:
                continue
            stat, p = ttest_rel(cols[left], cols[right])
            if np.isnan(p):
                continue
            mean_diff = cols[right].mean() - cols[left].mean()
            if p < alpha:
                votes.append(np.sign(mean_diff))
            rows.append({
                "iD": pid,
                "metric": label,
                "left_mean": cols[left].mean(),
                "right_mean": cols[right].mean(),
                "mean_diff": mean_diff,
                "p_value": p,
            })
        if votes:
            score = np.sign(np.sum(votes))
            if score > 0:
                dominant[pid] = "Right"
            elif score < 0:
                dominant[pid] = "Left"
            else:
                dominant[pid] = "Ambiguous"
        else:
            dominant[pid] = "Ambiguous"
    detail_df = pd.DataFrame(rows)
    return dominant, detail_df


In [None]:
dominant_map, ttest_details = infer_dominant_hand(df, hand_pairs, alpha)
df["dominant_hand"] = df["iD"].map(dominant_map)

print("Dominant hand per participant:")
print(df[["iD", "dominant_hand"]].drop_duplicates().sort_values("iD"))
print("
Sample of t-test details (per participant x metric):")
display(ttest_details.head())

In [None]:
clean_df = df.copy()
for left, right, label in hand_pairs:
    clean_df.loc[clean_df["dominant_hand"] == "Right", left] = np.nan
    clean_df.loc[clean_df["dominant_hand"] == "Left", right] = np.nan
    clean_df[f"dominant_{label}"] = np.where(
        clean_df["dominant_hand"] == "Right",
        clean_df[right],
        np.where(clean_df["dominant_hand"] == "Left", clean_df[left], np.nan),
    )

clean_df.head()

### Technique-specific blanking
- `Chicken`: blank all hand-related columns (left/right position, thumbstick, trigger/grip, and head-distance-by-hand) and corresponding `dominant_*` hand columns.
- `Astral`, `Grab`, `Sliding`, `Teleport`, `Throw`: blank head-related columns and `dominant_headDistance`.


In [None]:
hand_cols = [
    "leftTotalDistance", "rightTotalDistance",
    "leftExtent", "rightExtent",
    "leftHeadDistance", "rightHeadDistance",
    "leftThumbstickDistance", "rightThumbstickDistance",
    "leftThumbstickExtent", "rightThumbstickExtent",
    "leftTriggerPressure", "rightTriggerPressure",
    "leftGripPressure", "rightGripPressure",
]

# Dominant columns generated earlier that relate to hands (includes headDistance derived from hands)
dominant_hand_cols = [
    "dominant_totalDistance",
    "dominant_extent",
    "dominant_thumbstickDistance",
    "dominant_thumbstickExtent",
    "dominant_triggerPressure",
    "dominant_gripPressure",
    "dominant_headDistance",
]

head_cols = ["headTotalDistance", "headExtent"]
dominant_head_cols = ["dominant_headDistance"]

chicken_mask = clean_df["technique"] == "Chicken"
clean_df.loc[chicken_mask, hand_cols] = np.nan
clean_df.loc[chicken_mask, [c for c in dominant_hand_cols if c in clean_df.columns]] = np.nan

head_mask = clean_df["technique"].isin(["Astral", "Grab", "Sliding", "Teleport", "Throw"])
clean_df.loc[head_mask, head_cols] = np.nan
clean_df.loc[head_mask, [c for c in dominant_head_cols if c in clean_df.columns]] = np.nan

print("Applied technique-specific blanking:")
print(f"Hand columns blanked for Chicken rows: {chicken_mask.sum()} rows")
print(f"Head columns blanked for Astral/Grab/Sliding/Teleport/Throw rows: {head_mask.sum()} rows")

clean_df.head()

In [None]:
clean_path = "data_cleaned.csv"
clean_df.to_csv(clean_path, index=False)
print(f"Saved cleaned data with non-dominant and technique-specific blanking to {clean_path}")
clean_df[["iD", "dominant_hand"]].drop_duplicates().sort_values("iD")
