# 📂 Data Cleaning Notebook
# Author: Ryan Attia
# Date: April 23, 2025
# Description: Clean and prepare Fitbit data for analysis


In [1]:
import pandas as pd

# ----------------------------
# 🔹 Step 1: Load Raw Data
# ----------------------------

In [None]:
activity = pd.read_csv("../data/raw/dailyActivity_merged.csv")
sleep = pd.read_csv("../data/raw/sleepDay_merged.csv")
weight = pd.read_csv("../data/raw/weightLogInfo_merged.csv")

# ----------------------------
# 🔹 Step 2: Clean Daily Activity Data
# ----------------------------

In [3]:
# Convert date column
activity['ActivityDate'] = pd.to_datetime(activity['ActivityDate'])

# Drop duplicates
activity.drop_duplicates(inplace=True)

# Clean column names (optional)
activity.columns = activity.columns.str.strip().str.lower().str.replace(" ", "_")

# ----------------------------
# 🔹 Step 3: Clean Sleep Data
# ----------------------------

In [4]:
# Convert date column
sleep['SleepDay'] = pd.to_datetime(sleep['SleepDay'])
sleep.rename(columns={'SleepDay': 'ActivityDate'}, inplace=True)

# Drop duplicates and clean column names
sleep.drop_duplicates(inplace=True)
sleep.columns = sleep.columns.str.strip().str.lower().str.replace(" ", "_")

  sleep['SleepDay'] = pd.to_datetime(sleep['SleepDay'])


# ----------------------------
# 🔹 Step 4: Clean Weight Log Data
# ----------------------------

In [5]:
weight['Date'] = pd.to_datetime(weight['Date'])
weight.drop_duplicates(inplace=True)
weight.columns = weight.columns.str.strip().str.lower().str.replace(" ", "_")

  weight['Date'] = pd.to_datetime(weight['Date'])


# ----------------------------
# 🔹 Step 5: Merge Activity + Sleep Data
# ----------------------------


In [6]:
merged_activity_sleep = pd.merge(activity, sleep, on=['id', 'activitydate'], how='inner')

# ----------------------------
# 🔹 Step 6: Save Processed Files
# ----------------------------

In [None]:
activity.to_csv("../data/processed/clean_daily_activity.csv", index=False)
sleep.to_csv("../data/processed/clean_sleep_day.csv", index=False)
weight.to_csv("../data/processed/clean_weight_log.csv", index=False)
merged_activity_sleep.to_csv("../data/processed/merged_activity_sleep.csv", index=False)

print("✅ Data cleaned and saved to /data/processed/")

✅ Data cleaned and saved to /data/processed/
