In [784]:
from glob import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option("max_rows", 2000)

In [785]:
# Get all stats
paths = glob("data/*")
dataPaths = {}
for path in paths:
    start = path.replace(".", "", 2).find(".") + 3
    end = path.find("2020") - 1
    dataPaths.update({path[start:end]: path})

In [825]:
# Read but skip first row of each csv
dataframes = {}
for name in dataPaths:
    dataframes.update({name:pd.read_csv(dataPaths[name], skiprows=[0], index_col=False)})

# Helper Functions
def convertToDate(series):
    return pd.to_datetime(series, format='%Y-%m-%d %H:%M:%S')

# Init functions
def initDaysum(remvOutlier=True, start=None, end=None):
    daysum = dataframes["activity.day_summary"]
    for datecol in ["update_time", "create_time"]:
        daysum[datecol] = convertToDate(daysum[datecol])
    
    daysum.index =  daysum["create_time"]
    daysum = daysum.sort_index()
    
    if remvOutlier:
        # Slice off weird outlier
        before = (daysum.index < "2019-01-28 21:12:40.484")
        after = (daysum.index > "2019-01-28 21:12:41.745")
        daysum = daysum.loc[before | after]
    
    if start and end:
        daysum = daysum.loc[start:end]
        
    return daysum

def initSleep(start=None, end=None):
    columns = ["com.samsung.health.sleep.start_time", 
               "com.samsung.health.sleep.end_time", 
               "com.samsung.health.sleep.create_time",
               "com.samsung.health.sleep.update_time",
               "com.samsung.health.sleep.time_offset"]
    sleep = dataframes["sleep"][columns]
    sleep.columns = ["start_time", "end_time", "create_time", "update_time", "time_offset"]
    
    sleep.index = sleep["start_time"]
    sleep.sort_index(inplace=True)
    
    for datecol in ["start_time", "end_time", "create_time", "update_time"]:
        sleep[datecol] = convertToDate(sleep[datecol])
    
    if start and end:
        sleep = sleep.loc[start:end]
        
    return sleep