In [None]:
import os
import pandas as pd

# Directory containing all metric CSV files
username = "someshbgd3"
data_dir = f"Data/{username}/Cleaned"

# List all CSV files in the directory
csv_files = [f for f in os.listdir(data_dir) if f.endswith(".csv")]

# Dictionary to store DataFrames
dfs = {}

# Load all CSVs into a dictionary
for file in csv_files:
    file_path = os.path.join(data_dir, file)
    
    try:
        df = pd.read_csv(file_path)
        
        # Ensure 'start' is in datetime format
        df["start"] = pd.to_datetime(df["start"], errors="coerce")
        metric_name = file.replace(".csv", "").split("_")[-1]
        df["metric"] = metric_name  # Add a column for the metric name
        dfs[metric_name] = df

    except Exception as e:
        print(f"❌ Error reading {file}: {e}")

# Combine all metrics into a single DataFrame
df_combined = pd.concat(dfs.values(), ignore_index=True)

# Round 'start' time to the nearest minute
df_combined["start"] = df_combined["start"].dt.floor("T")

# Identify heart rate data
df_heart_rate = df_combined[df_combined["metric"] == "heartRate"]

# Identify all other metrics
df_other_metrics = df_combined[df_combined["metric"] != "heartRate"]

# Find minutes where other metrics exist
existing_metrics_time = df_other_metrics["start"].unique()

# If heart rate is the only metric, downsample to every 10 minutes
df_heart_rate_filtered = df_heart_rate.copy()
df_heart_rate_filtered["minute_group"] = df_heart_rate_filtered["start"].dt.floor("10T")  # Round to 10-minute intervals

# Only keep 1 record per 10-minute interval **IF** there are no other metrics
df_heart_rate_filtered = df_heart_rate_filtered[
    df_heart_rate_filtered["start"].isin(existing_metrics_time) | 
    (df_heart_rate_filtered.groupby("minute_group")["start"].transform("first") == df_heart_rate_filtered["start"])
].drop(columns=["minute_group"])

# Combine heart rate and other metrics
df_final = pd.concat([df_heart_rate_filtered, df_other_metrics], ignore_index=True)

# Save the merged file
output_file = "merged_health_data.csv"
df_final.to_csv(output_file, index=False)

print(f"✅ Merged data saved to {output_file}")


✅ Merged data saved to merged_health_data.csv


  df_combined["start"] = df_combined["start"].dt.floor("T")
  df_heart_rate_filtered["minute_group"] = df_heart_rate_filtered["start"].dt.floor("10T")  # Round to 10-minute intervals


: 