In [145]:
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import os
import ast

In [None]:
# Directory containing the CSV files
data_dir = "Data"
# username = "someshbgd3"
username = "gaurav_surtani"

# List of available health metrics
METRICS_str = "activeCaloriesBurned, basalBodyTemperature, basalMetabolicRate, bloodGlucose, bloodPressure, bodyFat, bodyTemperature, boneMass, cervicalMucus, distance, exerciseSession, elevationGained, floorsClimbed, heartRate, height, hydration, leanBodyMass, menstruationFlow, menstruationPeriod, nutrition, ovulationTest, oxygenSaturation, power, respiratoryRate, restingHeartRate, sleepSession, speed, steps, stepsCadence, totalCaloriesBurned, vo2Max, weight, wheelchairPushes"
METRICS = METRICS_str.split(", ")

# Initialize dictionary
METRICS_dict = {}

# Assign keys to dictionary with relevant columns
# METRICS_dict[METRICS[0]] = []  # activeCaloriesBurned
# METRICS_dict[METRICS[1]] = []  # basalBodyTemperature
METRICS_dict[METRICS[2]] = ["_id", "id", "end"]  # basalMetabolicRate
# METRICS_dict[METRICS[3]] = []  # bloodGlucose
# METRICS_dict[METRICS[4]] = []  # bloodPressure
METRICS_dict[METRICS[5]] = ["_id", "id", "end"]  # bodyFat
# METRICS_dict[METRICS[6]] = []  # bodyTemperature
# METRICS_dict[METRICS[7]] = []  # boneMass
# METRICS_dict[METRICS[8]] = []  # cervicalMucus
METRICS_dict[METRICS[9]] = ["_id", "id"]  # distance
METRICS_dict[METRICS[10]] = ["_id", "id"]  # exerciseSession
METRICS_dict[METRICS[11]] = ["_id", "id"]  # elevationGained
METRICS_dict[METRICS[12]] = ["_id", "id"]  # floorsClimbed
METRICS_dict[METRICS[13]] = ["_id", "id", "end", "start"]  # heartRate
METRICS_dict[METRICS[14]] = ["_id", "id", "end"]  # height
# METRICS_dict[METRICS[15]] = []  # hydration
# METRICS_dict[METRICS[16]] = []  # leanBodyMass
# METRICS_dict[METRICS[17]] = []  # menstruationFlow
# METRICS_dict[METRICS[18]] = []  # menstruationPeriod
METRICS_dict[METRICS[19]] = ["_id", "id", "end"]  # nutrition
# METRICS_dict[METRICS[20]] = []  # ovulationTest
METRICS_dict[METRICS[21]] = ["_id", "id", "end"]  # oxygenSaturation
METRICS_dict[METRICS[22]] = []  # power
METRICS_dict[METRICS[23]] = []  # respiratoryRate
METRICS_dict[METRICS[24]] = []  # restingHeartRate
METRICS_dict[METRICS[25]] = ["_id", "id"]  # sleepSession # STAGE_AWAKE  STAGE_LIGHT  STAGE_DEEP  STAGE_REM
METRICS_dict[METRICS[26]] = ["_id", "id"]  # speed
METRICS_dict[METRICS[27]] = ["_id", "id", "end"]  # steps
# METRICS_dict[METRICS[28]] = []  # stepsCadence
METRICS_dict[METRICS[29]] = ["_id", "id"]  # totalCaloriesBurned
# METRICS_dict[METRICS[30]] = ["_id", "id", "end"]  # vo2Max
METRICS_dict[METRICS[31]] = ["_id", "id", "end"]  # weight
# METRICS_dict[METRICS[32]] = []  # wheelchairPushes


In [147]:
def read_csv(username, metric):
    # try:
    file_path = os.path.join(data_dir, f"./{username}/Uncleaned/{metric}_{username}.csv")
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        return df
    else:
        print(f"❌ {metric} CSV file does not exist")
        return None

In [148]:
def drop_columns(df, metric):
    df.drop(columns=METRICS_dict[metric], inplace=True)
    

In [149]:
def expand_data_column(df, metric):
    expanded_rows = []

    for _, row in df.iterrows():
        try:
            data_dict = ast.literal_eval(row["data"].replace("'", "\""))  # Convert string to dict safely
            flattened_data = {}

            # Flatten nested dictionaries
            for key, value in data_dict.items():
                if isinstance(value, dict):  
                    for sub_key, sub_value in value.items():
                        flattened_data[f"{metric}_{key}_{sub_key}"] = sub_value
                else:
                    flattened_data[f"{metric}_{key}"] = value
            
            # Combine with existing row data
            new_row = row.to_dict()
            new_row.pop("data")  # Remove original data column
            new_row.update(flattened_data)  # Add expanded data attributes

            expanded_rows.append(new_row)
        except Exception as e:
            print(f"❌ Error processing row: {row['data']} - {e}")

        df_expanded = pd.DataFrame(expanded_rows)
        
        if "start" in df_expanded.columns:
            df_expanded["start"] = pd.to_datetime(df_expanded["start"], format="ISO8601", errors="coerce")
            df_expanded["start"] = df_expanded["start"].dt.round("min")
        if "end" in df_expanded.columns:
            df_expanded["end"] = pd.to_datetime(df_expanded["end"], format="ISO8601", errors="coerce")
            df_expanded["end"] = df_expanded["end"].dt.round("min")
        
         # Add total_time column (difference in minutes)
        if "start" in df_expanded.columns and "end" in df_expanded.columns:
            df_expanded[f"{metric}_total_time"] = (df_expanded["end"] - df_expanded["start"]).dt.total_seconds() / 60  # Convert to minutes


    return df_expanded


In [150]:
def expand_heart_beat(df, metric):
    """
    Expands the 'data' column into separate columns for each attribute.
    If the column contains a list (e.g., samples), each entry is expanded into a new row.
    """
    expanded_rows = []
    data_column = "data"

    for _, row in df.iterrows():
        try:
            data_content = row[data_column]

            # Convert string representation of dictionary to actual dictionary
            data_parsed = ast.literal_eval(data_content.replace("'", "\"")) if isinstance(data_content, str) else data_content
            
            if isinstance(data_parsed, dict) and "samples" in data_parsed:
                for sample in data_parsed["samples"]:
                    new_row = row.to_dict()  # Copy original row
                    new_row.pop(data_column)  # Remove the original JSON column
                    new_row.update(sample)  # Add extracted values (beatsPerMinute & time)
                    expanded_rows.append(new_row)
        except Exception as e:
            print(f"❌ Error processing row: {row[data_column]} - {e}")
            
    df_expanded = pd.DataFrame(expanded_rows)
    
    if "time" in df_expanded.columns:
        df_expanded["time"] = pd.to_datetime(df_expanded["time"], format='ISO8601')  # Convert time to datetime
        df_expanded = df_expanded.sort_values(by="time")  # Sort by time
                
    # Ensure 'time' column is in datetime format
    if "time" in df_expanded.columns:
        df_expanded["minute"] = df_expanded["time"].dt.round("min")

        # Group by minute and calculate rounded average
        df_grouped = df_expanded.groupby(["app", "minute"], as_index=False).agg(
            beatsPerMinute=("beatsPerMinute", lambda x: round(np.mean(x)))  # Rounded average
        )

        # Rename 'minute' column back to 'time'
        df_grouped.rename(columns={"minute": "time"}, inplace=True)

    return df_grouped


In [151]:
def extract_vitamin_grams(df, metric):
    extracted_rows = []
    data_column = "data"
    
    for _, row in df.iterrows():
        try:
            data_content = row[data_column]

            # Convert string representation of dictionary to actual dictionary
            data_parsed = ast.literal_eval(data_content.replace("'", "\"")) if isinstance(data_content, str) else data_content
            
            if isinstance(data_parsed, dict):
                flattened_data = {}

                for key, value in data_parsed.items():
                    if isinstance(value, dict) and "inGrams" in value:
                        flattened_data[f"{key}_inGrams"] = value["inGrams"]  # Extract only 'inGrams'

                new_row = row.to_dict()
                new_row.pop(data_column)  # Remove the original JSON column
                new_row.update(flattened_data)  # Add extracted nutrient data
                extracted_rows.append(new_row)
        
        except Exception as e:
            print(f"❌ Error processing row: {row[data_column]} - {e}")
    
    df_expanded = pd.DataFrame(extracted_rows)
    
    df_expanded["start"] = pd.to_datetime(df_expanded["start"], format="ISO8601", errors="coerce")
    df_expanded["start"] = df_expanded["start"].dt.round("min")

    return df_expanded


In [152]:
def process_sleep_data(df, metric, data_column="data"):
    stage_columns = ["sleep_stage_1", "sleep_stage_2", "sleep_stage_3", "sleep_stage_4"]
    new_rows = []

    for _, row in df.iterrows():
        try:
            # Parse the JSON-like string
            sleep_data = ast.literal_eval(row[data_column].replace("'", "\"")) if isinstance(row[data_column], str) else row[data_column]

            # Initialize sleep stage times
            sleep_stage_times = {f"sleep_stage_{i}": timedelta(0) for i in range(1, 9)}

            if isinstance(sleep_data, dict) and "stages" in sleep_data:
                for stage_info in sleep_data["stages"]:
                    start_time = datetime.fromisoformat(stage_info["startTime"].replace("Z", ""))
                    end_time = datetime.fromisoformat(stage_info["endTime"].replace("Z", ""))
                    duration = end_time - start_time  # Calculate duration

                    # Accumulate duration based on stage
                    stage_key = f"sleep_stage_{stage_info['stage']}"
                    if stage_key in sleep_stage_times:
                        sleep_stage_times[stage_key] += duration

            # Compute total sleep time using row's `start` and `end` columns
            total_sleep_time = None
            if "start" in row and "end" in row:
                try:
                    start_time = datetime.fromisoformat(row["start"].replace("Z", ""))
                    end_time = datetime.fromisoformat(row["end"].replace("Z", ""))
                    total_sleep_time = end_time - start_time
                except Exception:
                    total_sleep_time = None  # Handle incorrect formats

            # Convert timedelta to minutes for easy analysis
            row_data = row.to_dict()
            for stage in sleep_stage_times:
                row_data[stage] = sleep_stage_times[stage].total_seconds() / 60  # Convert to minutes

            row_data["total_sleep_time"] = total_sleep_time.total_seconds() / 60 if total_sleep_time else None  # Convert to minutes
            new_rows.append(row_data)

        except Exception as e:
            print(f"❌ Error processing row: {row[data_column]} - {e}")

        df_expanded = pd.DataFrame(new_rows)
        
        if "start" in df_expanded.columns:
            df_expanded["start"] = pd.to_datetime(df_expanded["start"], format="ISO8601", errors="coerce")
            df_expanded["start"] = df_expanded["start"].dt.round("min")
        if "end" in df_expanded.columns:
            df_expanded["end"] = pd.to_datetime(df_expanded["end"], format="ISO8601", errors="coerce")
            df_expanded["end"] = df_expanded["end"].dt.round("min")

    return df_expanded


In [153]:
def process_speed_data(df, metric, data_column="data"):
    new_rows = []

    for _, row in df.iterrows():
        try:
            # Convert string JSON to dictionary safely
            speed_data = ast.literal_eval(row[data_column].replace("'", "\"")) if isinstance(row[data_column], str) else row[data_column]
            
            total_speed = 0
            num_samples = 0

            if isinstance(speed_data, dict) and "samples" in speed_data:
                for sample in speed_data["samples"]:
                    if "speed" in sample and "inKilometersPerHour" in sample["speed"]:
                        total_speed += sample["speed"]["inKilometersPerHour"]
                        num_samples += 1
            
            # Compute average speed (avoid division by zero)
            avg_speed = (total_speed / num_samples) if num_samples > 0 else None

            # Compute total time spent using start and end timestamps
            total_time_spent = None
            if "start" in row and "end" in row:
                try:
                    start_time = datetime.fromisoformat(row["start"].replace("Z", ""))
                    end_time = datetime.fromisoformat(row["end"].replace("Z", ""))
                    total_time_spent = (end_time - start_time).total_seconds() / 60  # Convert to minutes
                except Exception:
                    total_time_spent = None  # Handle incorrect formats

            # Append calculated values
            row_data = row.to_dict()
            row_data["total_time_spent"] = total_time_spent
            row_data["average_speed_kmh"] = avg_speed

            new_rows.append(row_data)

        except Exception as e:
            print(f"❌ Error processing row: {row[data_column]} - {e}")


        df_expanded = pd.DataFrame(new_rows)
        
        if "start" in df_expanded.columns:
            df_expanded["start"] = pd.to_datetime(df_expanded["start"], format="ISO8601", errors="coerce")
            df_expanded["start"] = df_expanded["start"].dt.round("min")
        if "end" in df_expanded.columns:
            df_expanded["end"] = pd.to_datetime(df_expanded["end"], format="ISO8601", errors="coerce")
            df_expanded["end"] = df_expanded["end"].dt.round("min")

    return df_expanded



In [154]:
metric = METRICS[2] # basalMetabolicRate

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    df = df.drop(columns=["basalMetabolicRate_basalMetabolicRate_inWatts"])
    df = df.rename(columns={"basalMetabolicRate_basalMetabolicRate_inKilocaloriesPerDay": "basalMetabolicRate_inKilocaloriesPerDay"})
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No basalMetabolicRate stats for the user: {username} - {e}")

                           app                     start  \
0  com.sec.android.app.shealth 2025-02-09 21:25:00+00:00   
1  com.sec.android.app.shealth 2025-02-20 09:33:00+00:00   
2  com.sec.android.app.shealth 2025-02-20 09:34:00+00:00   

   basalMetabolicRate_inKilocaloriesPerDay  
0                                   1875.0  
1                                   1835.0  
2                                   1845.0  


In [155]:
metric = METRICS[5] # bodyFat

try: 
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No bodyFat stats for the user: {username} - {e}")


                           app                     start  bodyFat_percentage
0  com.sec.android.app.shealth 2025-02-09 21:25:00+00:00           33.184364
1  com.sec.android.app.shealth 2025-02-20 09:33:00+00:00           33.989258
2  com.sec.android.app.shealth 2025-02-20 09:34:00+00:00           33.533752
3               com.vt.vitafit 2025-03-01 21:51:00+00:00           26.060000
4               com.vt.vitafit 2025-03-03 18:19:00+00:00           26.240000


In [156]:
metric = METRICS[9] # distance

try: 
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    df = df.drop(columns=["distance_distance_inFeet","distance_distance_inInches","distance_distance_inMeters"])
    df = df.rename(columns={"distance_distance_inKilometers": "distance_inKilometers", "distance_distance_inMiles": "distance_inMiles"})
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No distance stats for the user: {username} - {e}")

                           app                       end  \
0  com.sec.android.app.shealth 2025-02-10 22:30:00+00:00   
1  com.sec.android.app.shealth 2025-02-12 03:03:00+00:00   
2  com.sec.android.app.shealth 2025-02-12 04:41:00+00:00   
3  com.sec.android.app.shealth 2025-02-14 01:29:00+00:00   
4  com.sec.android.app.shealth 2025-02-14 21:57:00+00:00   

                      start  distance_inKilometers  distance_inMiles  \
0 2025-02-10 22:11:00+00:00               1.738000          1.079946   
1 2025-02-12 02:56:00+00:00               0.485488          0.301669   
2 2025-02-12 04:36:00+00:00               0.185109          0.115022   
3 2025-02-14 01:18:00+00:00               0.651740          0.404973   
4 2025-02-14 21:45:00+00:00               0.632090          0.392764   

   distance_total_time  
0                 19.0  
1                  7.0  
2                  5.0  
3                 11.0  
4                 12.0  


In [157]:
metric = METRICS[10] # exerciseSession

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No exerciseSession stats for the user: {username} - {e}")

                           app                       end  \
0  com.sec.android.app.shealth 2025-02-10 04:12:00+00:00   
1  com.sec.android.app.shealth 2025-02-10 22:30:00+00:00   
2  com.sec.android.app.shealth 2025-02-10 23:10:00+00:00   
3  com.sec.android.app.shealth 2025-02-12 03:03:00+00:00   
4  com.sec.android.app.shealth 2025-02-12 04:35:00+00:00   

                      start exerciseSession_endZoneOffset_id  \
0 2025-02-10 03:42:00+00:00                           -08:00   
1 2025-02-10 22:11:00+00:00                           -08:00   
2 2025-02-10 22:43:00+00:00                           -08:00   
3 2025-02-12 02:56:00+00:00                           -08:00   
4 2025-02-12 03:33:00+00:00                           -08:00   

   exerciseSession_endZoneOffset_totalSeconds  exerciseSession_exerciseType  \
0                                      -28800                            68   
1                                      -28800                            57   
2                

In [158]:
metric = METRICS[11] # elevationGained

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    df = df.drop(columns=["elevationGained_elevation_inKilometers","elevationGained_elevation_inInches", "elevationGained_elevation_inMiles"])
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No elevationGained stats for the user: {username} - {e}")

❌ elevationGained CSV file does not exist
No elevationGained stats for the user: someshbgd3 - 'NoneType' object has no attribute 'drop'


In [159]:
metric = METRICS[12] # floorsClimbed

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No floorsClimbed stats for the user: {username} - {e}")

❌ floorsClimbed CSV file does not exist
No floorsClimbed stats for the user: someshbgd3 - 'NoneType' object has no attribute 'drop'


In [160]:
metric = METRICS[13] # heartRate

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_heart_beat(df, metric)
    df = df.rename(columns={"time": "start"})
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No heartRate stats for the user: {username} - {e}")

                           app                     start  beatsPerMinute
0  com.sec.android.app.shealth 2025-02-06 21:00:00+00:00              49
1  com.sec.android.app.shealth 2025-02-06 21:01:00+00:00              51
2  com.sec.android.app.shealth 2025-02-06 21:02:00+00:00              50
3  com.sec.android.app.shealth 2025-02-06 21:03:00+00:00              51
4  com.sec.android.app.shealth 2025-02-06 21:04:00+00:00              50


In [161]:
metric = METRICS[14] # height

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    df = df.drop(columns=["height_height_inKilometers","height_height_inMiles"])
    df = df.rename(columns={"height_height_inFeet": "height_inFeet", "height_height_inInches": "height_inInches", "height_height_inMeters": "height_inMeters"})
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No height stats for the user: {username} - {e}")

                           app                     start  height_inFeet  \
0  com.sec.android.app.shealth 2025-02-07 18:09:00+00:00       5.833333   
1  com.sec.android.app.shealth 2025-02-09 21:25:00+00:00       5.833333   
2  com.sec.android.app.shealth 2025-02-10 05:58:00+00:00       5.833333   
3  com.sec.android.app.shealth 2025-02-10 20:47:00+00:00       5.833333   
4  com.sec.android.app.shealth 2025-02-11 18:37:00+00:00       5.833333   

   height_inInches  height_inMeters  
0        70.000001            1.778  
1        70.000001            1.778  
2        70.000001            1.778  
3        70.000001            1.778  
4        70.000001            1.778  


In [162]:
metric = METRICS[19] # nutrition

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = extract_vitamin_grams(df, metric)
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No nutrition stats for the user: {username} - {e}")

                           app                     start  biotin_inGrams  \
0  com.sec.android.app.shealth 2025-02-07 23:55:00+00:00               0   
1  com.sec.android.app.shealth 2025-02-10 07:25:00+00:00               0   
2  com.sec.android.app.shealth 2025-02-10 07:28:00+00:00               0   
3  com.sec.android.app.shealth 2025-02-10 07:30:00+00:00               0   
4  com.sec.android.app.shealth 2025-02-10 20:00:00+00:00               0   

   caffeine_inGrams  calcium_inGrams  chloride_inGrams  cholesterol_inGrams  \
0                 0           0.1390                 0                0.011   
1                 0           0.3496                 0                0.444   
2                 0           0.0168                 0                0.015   
3                 0           0.2548                 0                0.499   
4                 0           0.1690                 0                0.385   

   chromium_inGrams  copper_inGrams  dietaryFiber_inGrams  ...  \
0 

In [163]:
metric = METRICS[21]  # oxygenSaturation

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No oxygenSaturation stats for the user: {username} - {e}")

                           app                     start  \
0  com.sec.android.app.shealth 2025-02-10 11:55:00+00:00   
1  com.sec.android.app.shealth 2025-02-14 11:34:00+00:00   
2  com.sec.android.app.shealth 2025-02-14 19:26:00+00:00   
3  com.sec.android.app.shealth 2025-02-19 11:23:00+00:00   
4  com.sec.android.app.shealth 2025-02-20 02:51:00+00:00   

   oxygenSaturation_percentage  
0                           95  
1                           93  
2                           95  
3                           95  
4                           95  


In [164]:
metric = METRICS[25] # sleepSession

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = process_sleep_data(df, metric)
    df = df.drop(columns=["data"])
    # df = df.rename(columns={"data": "sleep_data"})
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No sleepSession stats for the user: {username} - {e}")

                           app                       end  \
0  com.sec.android.app.shealth 2025-02-10 16:56:00+00:00   
1  com.sec.android.app.shealth 2025-02-14 17:41:00+00:00   
2  com.sec.android.app.shealth 2025-02-14 20:32:00+00:00   
3  com.sec.android.app.shealth 2025-02-19 18:06:00+00:00   
4  com.sec.android.app.shealth 2025-02-20 04:36:00+00:00   

                      start  sleep_stage_1  sleep_stage_2  sleep_stage_3  \
0 2025-02-10 11:55:00+00:00           19.0            0.0            0.0   
1 2025-02-14 11:34:00+00:00           17.5            0.0            0.0   
2 2025-02-14 19:26:00+00:00            5.5            0.0            0.0   
3 2025-02-19 11:23:00+00:00           25.0            0.0            0.0   
4 2025-02-20 02:51:00+00:00           28.0            0.0            0.0   

   sleep_stage_4  sleep_stage_5  sleep_stage_6  sleep_stage_7  sleep_stage_8  \
0          186.0           38.0           58.5            0.0            0.0   
1          213.5      

In [165]:
metric = METRICS[26] # speed

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = process_speed_data(df, metric)
    df = df.drop(columns=["data"])
    df = df.rename(columns={"total_time_spent": "speed_total_time_spent"})
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No speed stats for the user: {username} - {e}")

                           app                       end  \
0  com.sec.android.app.shealth 2025-02-10 22:30:00+00:00   
1  com.sec.android.app.shealth 2025-02-12 03:03:00+00:00   
2  com.sec.android.app.shealth 2025-02-12 04:41:00+00:00   
3  com.sec.android.app.shealth 2025-02-14 01:29:00+00:00   
4  com.sec.android.app.shealth 2025-02-14 21:57:00+00:00   

                      start  speed_total_time_spent  average_speed_kmh  
0 2025-02-10 22:11:00+00:00               18.277000           5.849453  
1 2025-02-12 02:56:00+00:00                7.186650           4.585680  
2 2025-02-12 04:36:00+00:00                4.867717           3.142036  
3 2025-02-14 01:18:00+00:00               10.777417           4.258182  
4 2025-02-14 21:45:00+00:00               12.362450           4.796259  


In [166]:
metric = METRICS[27] # steps

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No steps stats for the user: {username} - {e}")

                           app                     start  steps_count
0  com.sec.android.app.shealth 2025-02-07 08:00:00+00:00         3456
1  com.sec.android.app.shealth 2025-02-08 08:00:00+00:00         5125
2  com.sec.android.app.shealth 2025-02-09 08:00:00+00:00         3994
3  com.sec.android.app.shealth 2025-02-10 08:00:00+00:00         5007
4  com.sec.android.app.shealth 2025-02-11 08:00:00+00:00         7892


In [167]:
metric = METRICS[29]  # totalCaloriesBurned

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    df = df.drop(columns=["totalCaloriesBurned_energy_inJoules", "totalCaloriesBurned_energy_inCalories", "totalCaloriesBurned_energy_inKilojoules"])
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No totalCaloriesBurned stats for the user: {username} - {e}")   

                           app                       end  \
0  com.sec.android.app.shealth 2025-02-10 04:12:00+00:00   
1  com.sec.android.app.shealth 2025-02-10 22:30:00+00:00   
2  com.sec.android.app.shealth 2025-02-10 23:10:00+00:00   
3  com.sec.android.app.shealth 2025-02-12 03:03:00+00:00   
4  com.sec.android.app.shealth 2025-02-12 04:35:00+00:00   

                      start  totalCaloriesBurned_energy_inKilocalories  \
0 2025-02-10 03:42:00+00:00                                 358.000000   
1 2025-02-10 22:11:00+00:00                                 158.000000   
2 2025-02-10 22:43:00+00:00                                 236.699997   
3 2025-02-12 02:56:00+00:00                                  71.779999   
4 2025-02-12 03:33:00+00:00                                 557.710022   

   totalCaloriesBurned_total_time  
0                            30.0  
1                            19.0  
2                            27.0  
3                             7.0  
4             

In [168]:
metric = METRICS[31] # weight

try:
    df = read_csv(username, metric)
    drop_columns(df, metric)
    df = expand_data_column(df, metric)
    df = df.drop(columns=["weight_weight_inGrams", "weight_weight_inMicrograms", "weight_weight_inMilligrams", "weight_weight_inOunces"])
    df = df.rename(columns={"weight_weight_inKilograms": "weight_inKilograms", "weight_weight_inPounds": "weight_inPounds"})
    os.makedirs(f"Data/{username}/Cleaned/", exist_ok=True) 
    file_path = os.path.join(data_dir, f"./{username}/Cleaned/{metric}_{username}_Cleaned.csv")
    df.to_csv(file_path, index=False)
    print(df.head())
except Exception as e:
    print(f"No weight stats for the user: {username} - {e}")

                           app                     start  weight_inKilograms  \
0  com.sec.android.app.shealth 2025-02-07 18:09:00+00:00          104.326302   
1  com.sec.android.app.shealth 2025-02-09 21:25:00+00:00          104.326241   
2  com.sec.android.app.shealth 2025-02-10 05:58:00+00:00          105.324203   
3  com.sec.android.app.shealth 2025-02-10 20:47:00+00:00          104.598503   
4  com.sec.android.app.shealth 2025-02-11 18:37:00+00:00          104.235603   

   weight_inPounds  
0       230.000125  
1       229.999990  
2       232.200122  
3       230.600226  
4       229.800169  
