In [169]:
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import os
import ast

In [170]:
# Directory containing the CSV files
data_dir = "Data"
# username = "someshbgd3"
username = "gaurav_surtani"

# List of available health metrics
METRICS_str = "activeCaloriesBurned, basalBodyTemperature, basalMetabolicRate, bloodGlucose, bloodPressure, bodyFat, bodyTemperature, boneMass, cervicalMucus, distance, exerciseSession, elevationGained, floorsClimbed, heartRate, height, hydration, leanBodyMass, menstruationFlow, menstruationPeriod, nutrition, ovulationTest, oxygenSaturation, power, respiratoryRate, restingHeartRate, sleepSession, speed, steps, stepsCadence, totalCaloriesBurned, vo2Max, weight, wheelchairPushes"
METRICS = METRICS_str.split(", ")

# Initialize dictionary
METRICS_dict = {}

# Assign keys to dictionary with relevant columns
# METRICS_dict[METRICS[0]] = []  # activeCaloriesBurned
# METRICS_dict[METRICS[1]] = []  # basalBodyTemperature
METRICS_dict[METRICS[2]] = ["_id", "id", "end"]  # basalMetabolicRate
# METRICS_dict[METRICS[3]] = []  # bloodGlucose
# METRICS_dict[METRICS[4]] = []  # bloodPressure
METRICS_dict[METRICS[5]] = ["_id", "id", "end"]  # bodyFat
# METRICS_dict[METRICS[6]] = []  # bodyTemperature
# METRICS_dict[METRICS[7]] = []  # boneMass
# METRICS_dict[METRICS[8]] = []  # cervicalMucus
METRICS_dict[METRICS[9]] = ["_id", "id"]  # distance
METRICS_dict[METRICS[10]] = ["_id", "id"]  # exerciseSession
METRICS_dict[METRICS[11]] = ["_id", "id"]  # elevationGained
METRICS_dict[METRICS[12]] = ["_id", "id"]  # floorsClimbed
METRICS_dict[METRICS[13]] = ["_id", "id", "end", "start"]  # heartRate
METRICS_dict[METRICS[14]] = ["_id", "id", "end"]  # height
# METRICS_dict[METRICS[15]] = []  # hydration
# METRICS_dict[METRICS[16]] = []  # leanBodyMass
# METRICS_dict[METRICS[17]] = []  # menstruationFlow
# METRICS_dict[METRICS[18]] = []  # menstruationPeriod
METRICS_dict[METRICS[19]] = ["_id", "id", "end"]  # nutrition
# METRICS_dict[METRICS[20]] = []  # ovulationTest
METRICS_dict[METRICS[21]] = ["_id", "id", "end"]  # oxygenSaturation
METRICS_dict[METRICS[22]] = []  # power
METRICS_dict[METRICS[23]] = []  # respiratoryRate
METRICS_dict[METRICS[24]] = []  # restingHeartRate
METRICS_dict[METRICS[25]] = ["_id", "id"]  # sleepSession # STAGE_AWAKE  STAGE_LIGHT  STAGE_DEEP  STAGE_REM
METRICS_dict[METRICS[26]] = ["_id", "id"]  # speed
METRICS_dict[METRICS[27]] = ["_id", "id", "end"]  # steps
# METRICS_dict[METRICS[28]] = []  # stepsCadence
METRICS_dict[METRICS[29]] = ["_id", "id"]  # totalCaloriesBurned
# METRICS_dict[METRICS[30]] = ["_id", "id", "end"]  # vo2Max
METRICS_dict[METRICS[31]] = ["_id", "id", "end"]  # weight
# METRICS_dict[METRICS[32]] = []  # wheelchairPushes


In [171]:
def read_csv(username, metric):
    # try:
    file_path = os.path.join(data_dir, f"./{username}/Uncleaned/{metric}_{username}.csv")
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        return df
    else:
        print(f"❌ {metric} CSV file does not exist")
        return None

In [172]:
def drop_columns(df, metric):
    df.drop(columns=METRICS_dict[metric], inplace=True)
    

In [173]:
def expand_data_column(df, metric):
    expanded_rows = []

    for _, row in df.iterrows():
        try:
            data_dict = ast.literal_eval(row["data"].replace("'", "\""))  # Convert string to dict safely
            flattened_data = {}

            # Flatten nested dictionaries
            for key, value in data_dict.items():
                if isinstance(value, dict):  
                    for sub_key, sub_value in value.items():
                        flattened_data[f"{metric}_{key}_{sub_key}"] = sub_value
                else:
                    flattened_data[f"{metric}_{key}"] = value
            
            # Combine with existing row data
            new_row = row.to_dict()
            new_row.pop("data")  # Remove original data column
            new_row.update(flattened_data)  # Add expanded data attributes

            expanded_rows.append(new_row)
        except Exception as e:
            print(f"❌ Error processing row: {row['data']} - {e}")

        df_expanded = pd.DataFrame(expanded_rows)
        
        if "start" in df_expanded.columns:
            df_expanded["start"] = pd.to_datetime(df_expanded["start"], format="ISO8601", errors="coerce")
            df_expanded["start"] = df_expanded["start"].dt.round("min")
        if "end" in df_expanded.columns:
            df_expanded["end"] = pd.to_datetime(df_expanded["end"], format="ISO8601", errors="coerce")
            df_expanded["end"] = df_expanded["end"].dt.round("min")
        
         # Add total_time column (difference in minutes)
        if "start" in df_expanded.columns and "end" in df_expanded.columns:
            df_expanded[f"{metric}_total_time"] = (df_expanded["end"] - df_expanded["start"]).dt.total_seconds() / 60  # Convert to minutes


    return df_expanded


In [174]:
def expand_heart_beat(df, metric):
    """
    Expands the 'data' column into separate columns for each attribute.
    If the column contains a list (e.g., samples), each entry is expanded into a new row.
    """
    expanded_rows = []
    data_column = "data"

    for _, row in df.iterrows():
        try:
            data_content = row[data_column]

            # Convert string representation of dictionary to actual dictionary
            data_parsed = ast.literal_eval(data_content.replace("'", "\"")) if isinstance(data_content, str) else data_content
            
            if isinstance(data_parsed, dict) and "samples" in data_parsed:
                for sample in data_parsed["samples"]:
                    new_row = row.to_dict()  # Copy original row
                    new_row.pop(data_column)  # Remove the original JSON column
                    new_row.update(sample)  # Add extracted values (beatsPerMinute & time)
                    expanded_rows.append(new_row)
        except Exception as e:
            print(f"❌ Error processing row: {row[data_column]} - {e}")
            
    df_expanded = pd.DataFrame(expanded_rows)
    
    if "time" in df_expanded.columns:
        df_expanded["time"] = pd.to_datetime(df_expanded["time"], format='ISO8601')  # Convert time to datetime
        df_expanded = df_expanded.sort_values(by="time")  # Sort by time
                
    # Ensure 'time' column is in datetime format
    if "time" in df_expanded.columns:
        df_expanded["minute"] = df_expanded["time"].dt.round("min")

        # Group by minute and calculate rounded average
        df_grouped = df_expanded.groupby(["app", "minute"], as_index=False).agg(
            beatsPerMinute=("beatsPerMinute", lambda x: round(np.mean(x)))  # Rounded average
        )

        # Rename 'minute' column back to 'time'
        df_grouped.rename(columns={"minute": "time"}, inplace=True)

    return df_grouped


In [175]:
def extract_vitamin_grams(df, metric):
    extracted_rows = []
    data_column = "data"
    
    for _, row in df.iterrows():
        try:
            data_content = row[data_column]

            # Convert string representation of dictionary to actual dictionary
            data_parsed = ast.literal_eval(data_content.replace("'", "\"")) if isinstance(data_content, str) else data_content
            
            if isinstance(data_parsed, dict):
                flattened_data = {}

                for key, value in data_parsed.items():
                    if isinstance(value, dict) and "inGrams" in value:
                        flattened_data[f"{key}_inGrams"] = value["inGrams"]  # Extract only 'inGrams'

                new_row = row.to_dict()
                new_row.pop(data_column)  # Remove the original JSON column
                new_row.update(flattened_data)  # Add extracted nutrient data
                extracted_rows.append(new_row)
        
        except Exception as e:
            print(f"❌ Error processing row: {row[data_column]} - {e}")
    
    df_expanded = pd.DataFrame(extracted_rows)
    
    df_expanded["start"] = pd.to_datetime(df_expanded["start"], format="ISO8601", errors="coerce")
    df_expanded["start"] = df_expanded["start"].dt.round("min")

    return df_expanded


In [176]:
def process_sleep_data(df, metric, data_column="data"):
    stage_columns = ["sleep_stage_1", "sleep_stage_2", "sleep_stage_3", "sleep_stage_4"]
    new_rows = []

    for _, row in df.iterrows():
        try:
            # Parse the JSON-like string
            sleep_data = ast.literal_eval(row[data_column].replace("'", "\"")) if isinstance(row[data_column], str) else row[data_column]

            # Initialize sleep stage times
            sleep_stage_times = {f"sleep_stage_{i}": timedelta(0) for i in range(1, 9)}

            if isinstance(sleep_data, dict) and "stages" in sleep_data:
                for stage_info in sleep_data["stages"]:
                    start_time = datetime.fromisoformat(stage_info["startTime"].replace("Z", ""))
                    end_time = datetime.fromisoformat(stage_info["endTime"].replace("Z", ""))
                    duration = end_time - start_time  # Calculate duration

                    # Accumulate duration based on stage
                    stage_key = f"sleep_stage_{stage_info['stage']}"
                    if stage_key in sleep_stage_times:
                        sleep_stage_times[stage_key] += duration

            # Compute total sleep time using row's `start` and `end` columns
            total_sleep_time = None
            if "start" in row and "end" in row:
                try:
                    start_time = datetime.fromisoformat(row["start"].replace("Z", ""))
                    end_time = datetime.fromisoformat(row["end"].replace("Z", ""))
                    total_sleep_time = end_time - start_time
                except Exception:
                    total_sleep_time = None  # Handle incorrect formats

            # Convert timedelta to minutes for easy analysis
            row_data = row.to_dict()
            for stage in sleep_stage_times:
                row_data[stage] = sleep_stage_times[stage].total_seconds() / 60  # Convert to minutes

            row_data["total_sleep_time"] = total_sleep_time.total_seconds() / 60 if total_sleep_time else None  # Convert to minutes
            new_rows.append(row_data)

        except Exception as e:
            print(f"❌ Error processing row: {row[data_column]} - {e}")

        df_expanded = pd.DataFrame(new_rows)
        
        if "start" in df_expanded.columns:
            df_expanded["start"] = pd.to_datetime(df_expanded["start"], format="ISO8601", errors="coerce")
            df_expanded["start"] = df_expanded["start"].dt.round("min")
        if "end" in df_expanded.columns:
            df_expanded["end"] = pd.to_datetime(df_expanded["end"], format="ISO8601", errors="coerce")
            df_expanded["end"] = df_expanded["end"].dt.round("min")

    return df_expanded


In [177]:
def process_speed_data(df, metric, data_column="data"):
    new_rows = []

    for _, row in df.iterrows():
        try:
            # Convert string JSON to dictionary safely
            speed_data = ast.literal_eval(row[data_column].replace("'", "\"")) if isinstance(row[data_column], str) else row[data_column]
            
            total_speed = 0
            num_samples = 0

            if isinstance(speed_data, dict) and "samples" in speed_data:
                for sample in speed_data["samples"]:
                    if "speed" in sample and "inKilometersPerHour" in sample["speed"]:
                        total_speed += sample["speed"]["inKilometersPerHour"]
                        num_samples += 1
            
            # Compute average speed (avoid division by zero)
            avg_speed = (total_speed / num_samples) if num_samples > 0 else None

            # Compute total time spent using start and end timestamps
            total_time_spent = None
            if "start" in row and "end" in row:
                try:
                    start_time = datetime.fromisoformat(row["start"].replace("Z", ""))
                    end_time = datetime.fromisoformat(row["end"].replace("Z", ""))
                    total_time_spent = (end_time - start_time).total_seconds() / 60  # Convert to minutes
                except Exception:
                    total_time_spent = None  # Handle incorrect formats

            # Append calculated values
            row_data = row.to_dict()
            row_data["total_time_spent"] = total_time_spent
            row_data["average_speed_kmh"] = avg_speed

            new_rows.append(row_data)

        except Exception as e:
            print(f"❌ Error processing row: {row[data_column]} - {e}")


        df_expanded = pd.DataFrame(new_rows)
        
        if "start" in df_expanded.columns:
            df_expanded["start"] = pd.to_datetime(df_expanded["start"], format="ISO8601", errors="coerce")
            df_expanded["start"] = df_expanded["start"].dt.round("min")
        if "end" in df_expanded.columns:
            df_expanded["end"] = pd.to_datetime(df_expanded["end"], format="ISO8601", errors="coerce")
            df_expanded["end"] = df_expanded["end"].dt.round("min")

    return df_expanded



In [178]:
metric = METRICS[2] # basalMetabolicRate

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    df = df.drop(columns=["basalMetabolicRate_basalMetabolicRate_inWatts"])
    df = df.rename(columns={"basalMetabolicRate_basalMetabolicRate_inKilocaloriesPerDay": "basalMetabolicRate_inKilocaloriesPerDay"})
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No basalMetabolicRate stats for the user: {username} - {e}")

❌ basalMetabolicRate CSV file does not exist
No basalMetabolicRate stats for the user: gaurav_surtani - 'NoneType' object has no attribute 'drop'


In [179]:
metric = METRICS[5] # bodyFat

try: 
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No bodyFat stats for the user: {username} - {e}")


❌ bodyFat CSV file does not exist
No bodyFat stats for the user: gaurav_surtani - 'NoneType' object has no attribute 'drop'


In [180]:
metric = METRICS[9] # distance

try: 
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    df = df.drop(columns=["distance_distance_inFeet","distance_distance_inInches","distance_distance_inMeters"])
    df = df.rename(columns={"distance_distance_inKilometers": "distance_inKilometers", "distance_distance_inMiles": "distance_inMiles"})
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No distance stats for the user: {username} - {e}")

                               app                       end  \
0          com.fitbit.FitbitMobile 2025-01-21 03:01:00+00:00   
1  com.google.android.apps.fitness 2025-01-21 03:01:00+00:00   
2          com.fitbit.FitbitMobile 2025-01-21 03:02:00+00:00   
3  com.google.android.apps.fitness 2025-01-21 03:02:00+00:00   
4          com.fitbit.FitbitMobile 2025-01-21 03:13:00+00:00   

                      start  distance_inKilometers  distance_inMiles  \
0 2025-01-21 03:00:00+00:00               0.000671          0.000417   
1 2025-01-21 03:00:00+00:00               0.000671          0.000417   
2 2025-01-21 03:01:00+00:00               0.002013          0.001251   
3 2025-01-21 03:01:00+00:00               0.002013          0.001251   
4 2025-01-21 03:12:00+00:00               0.001342          0.000834   

   distance_total_time  
0                  1.0  
1                  1.0  
2                  1.0  
3                  1.0  
4                  1.0  


In [181]:
metric = METRICS[10] # exerciseSession

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No exerciseSession stats for the user: {username} - {e}")

                       app                       end  \
0  com.fitbit.FitbitMobile 2025-02-13 09:11:00+00:00   
1  com.fitbit.FitbitMobile 2025-02-15 05:47:00+00:00   
2  com.fitbit.FitbitMobile 2025-02-15 13:33:00+00:00   
3  com.fitbit.FitbitMobile 2025-02-17 07:38:00+00:00   
4  com.fitbit.FitbitMobile 2025-02-17 09:17:00+00:00   

                      start exerciseSession_endZoneOffset_id  \
0 2025-02-13 08:50:00+00:00                           +05:30   
1 2025-02-15 05:23:00+00:00                           +05:30   
2 2025-02-15 13:02:00+00:00                           +05:30   
3 2025-02-17 07:11:00+00:00                           +05:30   
4 2025-02-17 08:34:00+00:00                           +05:30   

   exerciseSession_endZoneOffset_totalSeconds  exerciseSession_exerciseType  \
0                                       19800                            79   
1                                       19800                            79   
2                                       1

In [182]:
metric = METRICS[11] # elevationGained

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    df = df.drop(columns=["elevationGained_elevation_inKilometers","elevationGained_elevation_inInches", "elevationGained_elevation_inMiles"])
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No elevationGained stats for the user: {username} - {e}")

                       app                       end  \
0  com.fitbit.FitbitMobile 2025-01-23 05:26:00+00:00   
1  com.fitbit.FitbitMobile 2025-01-25 10:07:00+00:00   
2  com.fitbit.FitbitMobile 2025-01-26 15:33:00+00:00   
3  com.fitbit.FitbitMobile 2025-01-28 10:49:00+00:00   
4  com.fitbit.FitbitMobile 2025-01-28 15:12:00+00:00   

                      start  elevationGained_elevation_inFeet  \
0 2025-01-23 05:25:00+00:00                                20   
1 2025-01-25 10:06:00+00:00                                20   
2 2025-01-26 15:32:00+00:00                                10   
3 2025-01-28 10:48:00+00:00                                30   
4 2025-01-28 15:11:00+00:00                                10   

   elevationGained_elevation_inMeters  elevationGained_total_time  
0                               6.096                         1.0  
1                               6.096                         1.0  
2                               3.048                         1.0  


In [183]:
metric = METRICS[12] # floorsClimbed

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No floorsClimbed stats for the user: {username} - {e}")

                       app                       end  \
0  com.fitbit.FitbitMobile 2025-01-23 05:26:00+00:00   
1  com.fitbit.FitbitMobile 2025-01-25 10:07:00+00:00   
2  com.fitbit.FitbitMobile 2025-01-26 15:33:00+00:00   
3  com.fitbit.FitbitMobile 2025-01-28 10:49:00+00:00   
4  com.fitbit.FitbitMobile 2025-01-28 15:12:00+00:00   

                      start  floorsClimbed_floors  floorsClimbed_total_time  
0 2025-01-23 05:25:00+00:00                     2                       1.0  
1 2025-01-25 10:06:00+00:00                     2                       1.0  
2 2025-01-26 15:32:00+00:00                     1                       1.0  
3 2025-01-28 10:48:00+00:00                     3                       1.0  
4 2025-01-28 15:11:00+00:00                     1                       1.0  


In [184]:
metric = METRICS[13] # heartRate

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_heart_beat(df, metric)
    df = df.rename(columns={"time": "start"})
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No heartRate stats for the user: {username} - {e}")

                       app                     start  beatsPerMinute
0  com.fitbit.FitbitMobile 2025-01-21 13:17:00+00:00              94
1  com.fitbit.FitbitMobile 2025-01-21 13:18:00+00:00              76
2  com.fitbit.FitbitMobile 2025-01-21 13:19:00+00:00              76
3  com.fitbit.FitbitMobile 2025-01-21 13:20:00+00:00              78
4  com.fitbit.FitbitMobile 2025-01-21 13:21:00+00:00              76


In [185]:
metric = METRICS[14] # height

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    df = df.drop(columns=["height_height_inKilometers","height_height_inMiles"])
    df = df.rename(columns={"height_height_inFeet": "height_inFeet", "height_height_inInches": "height_inInches", "height_height_inMeters": "height_inMeters"})
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No height stats for the user: {username} - {e}")

❌ height CSV file does not exist
No height stats for the user: gaurav_surtani - 'NoneType' object has no attribute 'drop'


In [186]:
metric = METRICS[19] # nutrition

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = extract_vitamin_grams(df, metric)
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No nutrition stats for the user: {username} - {e}")

❌ nutrition CSV file does not exist
No nutrition stats for the user: gaurav_surtani - 'NoneType' object has no attribute 'drop'


In [187]:
metric = METRICS[21]  # oxygenSaturation

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No oxygenSaturation stats for the user: {username} - {e}")

❌ oxygenSaturation CSV file does not exist
No oxygenSaturation stats for the user: gaurav_surtani - 'NoneType' object has no attribute 'drop'


In [188]:
metric = METRICS[25] # sleepSession

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = process_sleep_data(df, metric)
    df = df.drop(columns=["data"])
    # df = df.rename(columns={"data": "sleep_data"})
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No sleepSession stats for the user: {username} - {e}")

                       app                       end  \
0  com.fitbit.FitbitMobile 2025-02-08 20:18:00+00:00   
1  com.fitbit.FitbitMobile 2025-02-19 06:40:00+00:00   
2  com.fitbit.FitbitMobile 2025-02-19 14:00:00+00:00   
3  com.fitbit.FitbitMobile 2025-02-20 02:24:00+00:00   
4  com.fitbit.FitbitMobile 2025-02-21 14:51:00+00:00   

                      start  sleep_stage_1  sleep_stage_2  sleep_stage_3  \
0 2025-02-08 14:50:00+00:00           35.5            0.0            0.0   
1 2025-02-19 05:02:00+00:00            0.0           98.0            0.0   
2 2025-02-19 11:21:00+00:00           76.0           83.0            0.0   
3 2025-02-19 20:37:00+00:00           49.5            0.0            0.0   
4 2025-02-21 08:58:00+00:00           46.0            0.0            0.0   

   sleep_stage_4  sleep_stage_5  sleep_stage_6  sleep_stage_7  sleep_stage_8  \
0          182.0           59.5           51.0            0.0            0.0   
1            0.0            0.0            0.0

In [189]:
metric = METRICS[26] # speed

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = process_speed_data(df, metric)
    df = df.drop(columns=["data"])
    df = df.rename(columns={"total_time_spent": "speed_total_time_spent"})
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No speed stats for the user: {username} - {e}")

                               app                       end  \
0  com.google.android.apps.fitness 2025-01-21 03:21:00+00:00   
1  com.google.android.apps.fitness 2025-01-21 03:55:00+00:00   
2  com.google.android.apps.fitness 2025-01-21 03:56:00+00:00   
3  com.google.android.apps.fitness 2025-01-21 04:48:00+00:00   
4  com.google.android.apps.fitness 2025-01-21 04:49:00+00:00   

                      start  speed_total_time_spent  average_speed_kmh  
0 2025-01-21 03:21:00+00:00                0.000017           2.619040  
1 2025-01-21 03:55:00+00:00                0.000017           3.347350  
2 2025-01-21 03:56:00+00:00                0.000017           2.762229  
3 2025-01-21 04:48:00+00:00                0.000017           3.505364  
4 2025-01-21 04:49:00+00:00                0.000017           2.532118  


In [190]:
metric = METRICS[27] # steps

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No steps stats for the user: {username} - {e}")

                               app                     start  steps_count
0          com.fitbit.FitbitMobile 2025-01-21 03:00:00+00:00            1
1  com.google.android.apps.fitness 2025-01-21 03:00:00+00:00            1
2          com.fitbit.FitbitMobile 2025-01-21 03:01:00+00:00            3
3  com.google.android.apps.fitness 2025-01-21 03:01:00+00:00            3
4  com.google.android.apps.fitness 2025-01-21 03:02:00+00:00            1


In [191]:
metric = METRICS[29]  # totalCaloriesBurned

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    df = df.drop(columns=["totalCaloriesBurned_energy_inJoules", "totalCaloriesBurned_energy_inCalories", "totalCaloriesBurned_energy_inKilojoules"])
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No totalCaloriesBurned stats for the user: {username} - {e}")   

                               app                       end  \
0  com.google.android.apps.fitness 2025-01-21 01:30:00+00:00   
1          com.fitbit.FitbitMobile 2025-01-21 01:45:00+00:00   
2  com.google.android.apps.fitness 2025-01-21 01:45:00+00:00   
3  com.google.android.apps.fitness 2025-01-21 01:45:00+00:00   
4          com.fitbit.FitbitMobile 2025-01-21 02:00:00+00:00   

                      start  totalCaloriesBurned_energy_inKilocalories  \
0 2025-01-21 01:30:00+00:00                                   0.000019   
1 2025-01-21 01:30:00+00:00                                  18.205650   
2 2025-01-21 01:30:00+00:00                                  18.205650   
3 2025-01-21 01:45:00+00:00                                   0.000019   
4 2025-01-21 01:45:00+00:00                                  18.205650   

   totalCaloriesBurned_total_time  
0                             0.0  
1                            15.0  
2                            15.0  
3                         

In [192]:
metric = METRICS[31] # weight

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    df = df.drop(columns=["weight_weight_inGrams", "weight_weight_inMicrograms", "weight_weight_inMilligrams", "weight_weight_inOunces"])
    df = df.rename(columns={"weight_weight_inKilograms": "weight_inKilograms", "weight_weight_inPounds": "weight_inPounds"})
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No weight stats for the user: {username} - {e}")

❌ weight CSV file does not exist
No weight stats for the user: gaurav_surtani - 'NoneType' object has no attribute 'drop'
