In [4]:
# Merge time-domain (preprocessed) with frequency-domain (FFT) features
# Keys: participant_id, activity_id
# Output: COEN498-691_HAR_features_combined.csv

import pandas as pd

# --- Inputs ---
TD_URL  = "https://raw.githubusercontent.com/COEN498-691-PROJECT/ML_project/refs/heads/main/data/processed/COEN498-691_HAR_preprocessed_dataset.csv"
FD_PATH = "HAR_fft_features_acc.csv"            # your FFT CSV in the current Colab working dir
OUT_PATH = "COEN498-691_HAR_features_combined.csv"

# --- Load ---
td = pd.read_csv(TD_URL)
fd = pd.read_csv(FD_PATH)

# --- Normalize keys to maximize matches (lowercase + strip) ---
for df_ in (td, fd):
    df_["participant_id"] = df_["participant_id"].astype(str).str.strip().str.lower()
    df_["activity_id"]    = df_["activity_id"].astype(str).str.strip().str.lower()

# If FFT has multiple rows per (participant_id, activity_id), aggregate (mean) to one row
fd_agg = (
    fd
    .groupby(["participant_id", "activity_id"], as_index=False)
    .mean(numeric_only=True)
)

print("Time-domain shape:", td.shape)
print("Freq-domain shape (original):", fd.shape)
print("Freq-domain shape (aggregated):", fd_agg.shape)

# --- Left-join: keep all time-domain rows, attach FFT features where available ---
combined = td.merge(fd_agg, on=["participant_id", "activity_id"], how="left")

# Optional: sort for readability
combined = combined.sort_values(["participant_id", "activity_id"]).reset_index(drop=True)

print("Combined shape:", combined.shape)
display(combined.head())

# --- Save ---
combined.to_csv(OUT_PATH, index=False)
print("Saved:", OUT_PATH)


Time-domain shape: (4751, 30)
Freq-domain shape (original): (20, 19)
Freq-domain shape (aggregated): (20, 19)
Combined shape: (4751, 47)


Unnamed: 0,ax_mean,ax_std,ax_max,ax_min,ax_range,ax_skew,ax_kurt,ax_zcr,ay_mean,ay_std,...,ay_f_dominant,ay_amp_dominant,ay_spectral_energy,ay_spectral_centroid,ay_bandwidth_5_95,az_f_dominant,az_amp_dominant,az_spectral_energy,az_spectral_centroid,az_bandwidth_5_95
0,-0.002901,0.004928,0.006377,-0.016937,0.023314,-0.768933,1.685408,0,-0.000697,0.002754,...,,,,,,,,,,
1,0.001048,0.00858,0.034233,-0.008386,0.042619,2.818647,9.516207,0,-0.001384,0.005157,...,,,,,,,,,,
2,-0.014103,0.041628,0.06774,-0.110841,0.178581,-0.524075,0.473809,6,-0.00596,0.015905,...,,,,,,,,,,
3,-0.004977,0.0658,0.150043,-0.110841,0.260884,0.468019,-0.174312,11,0.003083,0.025811,...,,,,,,,,,,
4,0.014108,0.055004,0.150043,-0.096431,0.246474,0.312943,0.285617,8,0.005712,0.024911,...,,,,,,,,,,


Saved: COEN498-691_HAR_features_combined.csv
