In [2]:
import pandas as pd
from pathlib import Path

# ====== Set your file paths ======
file1 = "./input/AppleWatch - HeartRate StepCount etc 8440 rows - analysis.csv"
file2 = "./input/AppleWatch - HeartRate StepCount etc 92406 rows - export20200620105726.csv"
out_path = "./apple_watch_merged_minimal.csv"
# =================================

# Columns to keep
KEEP_COLS = ["type", "unit", "creationDate", "startDate", "endDate", "value"]

def read_minimal(path):
    # Read CSV with all columns as string to avoid dtype warnings
    df = pd.read_csv(
        path,
        dtype=str,            # Read everything as string
        encoding="utf-8",     # Change to "utf-8-sig" or "cp932" if encoding errors occur
        engine="python"       # More tolerant to special characters
    )
    # Clean column names (strip spaces, remove BOM if any)
    df.columns = [c.strip().lstrip("\ufeff") for c in df.columns]
    # Add missing columns as empty string
    missing = [c for c in KEEP_COLS if c not in df.columns]
    for c in missing:
        df[c] = ""
    # Keep only the required columns
    df = df[KEEP_COLS].copy()
    # Drop rows where all columns are empty
    df = df[~(df[KEEP_COLS].apply(lambda s: s.fillna("").str.strip()).eq("").all(axis=1))]
    return df

# Read both files
df1 = read_minimal(file1)
df2 = read_minimal(file2)

# Concatenate
merged = pd.concat([df1, df2], ignore_index=True)

# Sort by startDate, then creationDate (stable sort)
merged = merged.sort_values(
    by=["startDate", "creationDate"],
    kind="mergesort",
    na_position="last"
)

# Save the merged file
Path(out_path).parent.mkdir(parents=True, exist_ok=True)
merged.to_csv(out_path, index=False, encoding="utf-8")

print("✅ Merge completed")
print("Output file:", out_path)
print("Total rows:", len(merged))
print("Preview:")
print(merged.head(5).to_string(index=False))

✅ Merge completed
Output file: ./apple_watch_merged_minimal.csv
Total rows: 100844
Preview:
                                          type  unit              creationDate                 startDate                   endDate   value
             HKQuantityTypeIdentifierStepCount count 2016-09-22 14:54:10 +0900 2016-09-22 13:41:39 +0900 2016-09-22 13:49:03 +0900     239
HKQuantityTypeIdentifierDistanceWalkingRunning    km 2016-09-22 14:54:10 +0900 2016-09-22 13:41:39 +0900 2016-09-22 13:49:03 +0900 0.13665
        HKQuantityTypeIdentifierFlightsClimbed count 2016-09-22 14:54:10 +0900 2016-09-22 13:48:32 +0900 2016-09-22 13:48:32 +0900       1
        HKQuantityTypeIdentifierFlightsClimbed count 2016-09-22 14:54:10 +0900 2016-09-22 13:48:50 +0900 2016-09-22 13:48:50 +0900       1
             HKQuantityTypeIdentifierStepCount count 2016-09-22 14:54:10 +0900 2016-09-22 13:49:03 +0900 2016-09-22 13:57:53 +0900     433
