# Steps by source for the 10 subjects with the highest average steps

In [2]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import csv, os, pickle
from os.path import isfile, join
from os import listdir
import numpy as np
from table_loader import load_health_kit
from datetime import datetime,timedelta
from dateutil.parser import parse
from pprint import pprint
#from synapse_parser import *

synapseCacheDir = "/scratch/PI/euan/projects/mhc/data/synapseCache_v2/"
table_path = "/scratch/PI/euan/projects/mhc/data/tables/v2_data_subset/cardiovascular-HealthKitDataCollector-v1.tsv"


def parse_healthkit_steps(file_path, check_speed=False):
    tally_dict=dict()
    #read in the data
    dtype_dict=dict()
    dtype_dict['names']=('startTime',
                         'endTime',
                         'type',
                         'value',
                         'unit',
                         'source',
                         'sourceIdentifier')
    dtype_dict['formats']=(datetime,
                           datetime,
                           'S36',
                           'i',
                           'S36',
                           'S36',
                           'S36')
    try:
        data=np.genfromtxt(file_path,
                           dtype=dtype_dict['formats'],
                           names=dtype_dict['names'],
                           delimiter=',',
                           #skip_header=True, takes int. our header is mid-file
                           loose=True,
                           invalid_raise=False,
                           converters={0:lambda x: parse(x),
                                       1:lambda x: parse(x)})
    except:
        return tally_dict
    #get the duration of each activity by day
    try:
        num_rows=len(data)
    except:
        return tally_dict
    for row in range(len(data)):
        if data['startTime'][row] is not None:
            day=data['startTime'][row].date()
            value=data['value'][row]
            datatype=data['type'][row]
            sourceIdentifier=data['sourceIdentifier'][row]
            if datatype == "HKQuantityTypeIdentifierStepCount":
                if check_speed:
                    start=data['startTime'][row]
                    stop=data['endTime'][row]
                    source=data['source'][row]
                    if start==stop:
                        print "0Delta", value, source, sourceIdentifier
                        continue
                    steps_per_second = float(value)/(stop-start).seconds
                    if steps_per_second > 4:
                        print "Speedy!", steps_per_second, source, sourceIdentifier

                if day not in tally_dict:
                    tally_dict[day]=dict()
                if sourceIdentifier not in tally_dict[day]:
                    tally_dict[day][sourceIdentifier]=value
                else:
                    tally_dict[day][sourceIdentifier]+=value
        else:
            #header row mid data
            continue
    return tally_dict


#we assume a subject will rarely have multiple table entries for motion tracker and health kit data in one day,
#but this is possible in the app, so we handle it: 
#sum minute durations together if there is a key conflict for a given day 
def merge_duration_dict(d1,d2):
    #merge duration values 
    d3=dict()
    for entry in d1:
        d3[entry]=d1[entry]
    for entry in d2:
        if entry in d3:
            #sum by key
            for key in d2[entry]:
                if key in d3[entry]:
                    d3[entry][key]+=d2[entry][key]
                else:
                    d3[entry][key]=d2[entry][key]
        else:
            d3[entry]=d2[entry]
    return d3
    #update fraction values

def get_synapse_cache_entry(synapseCacheDir,blob_name):
    #print(str(blob_name)) 
    parent_dir=blob_name[-3::].lstrip('0')
    if parent_dir=="":
        parent_dir="0" 
    mypath=synapseCacheDir+parent_dir+"/"+blob_name
    onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
    for f in onlyfiles:
        if f.startswith('data'):
            return mypath+'/'+f
        
        
outlier_subjects =  ['89a190e2-6b78-46b5-b469-27d231bbf123',
 '441a70b9-9b56-48d5-935a-a7a844abbf57',
 'b5eeefec-7f4a-48c5-aef4-0db7030fb532',
 '105de5d8-e264-47cc-8bdc-d4aff7d568bb',
 '6e070717-16ee-4485-8c05-a6f376b5d1bf',
 '54be93b9-8d7b-4640-b592-956891955dde',
 '6bcb2e5c-cfcf-48c5-bc3a-a0ef78fd4c25',
 '92d1c57e-5cce-4738-89d6-a7f67ed9baf7',
 'df3f739a-35a1-48c5-a64b-a010b952ac35',
 'bcfa41b8-8e9f-48f9-81e1-c27831cadecf']
subject = outlier_subjects[0]
print subject
cur_subject=subject

89a190e2-6b78-46b5-b469-27d231bbf123


In [3]:
data_table=load_health_kit(table_path)
pickle.dump(data_table,open("cardiovascular-HealthKitDataCollector-v1.p",'wb'))

In [4]:
data_table= pickle.load( open( "cardiovascular-HealthKitDataCollector-v1.p", "rb" ) )
data_subject = data_table[data_table["healthCode"]==subject]
print len(data_subject)

208


In [5]:
def SumHKBySourse(data_table, subject):
    data_subject = data_table[data_table["healthCode"]==subject]
    subject_distance_vals = {}
    for row in data_subject:
        blob_name=row['data']
        if blob_name.endswith('NA'):
            continue 
        synapseCacheFile=get_synapse_cache_entry(synapseCacheDir,blob_name)
        health_kit_distance=parse_healthkit_steps(synapseCacheFile)    
        #continue
        if cur_subject not in subject_distance_vals:
            subject_distance_vals[cur_subject]=health_kit_distance
        else: 
            result = merge_duration_dict(subject_distance_vals[cur_subject],health_kit_distance)
    return result

results={}
for subject in outlier_subjects:
    results[subject] = SumHKBySourse(data_table, subject)


    Line #94 (got 1 columns instead of 7)
    Line #301 (got 1 columns instead of 7)
    Line #337 (got 1 columns instead of 7)
    Line #370 (got 1 columns instead of 7)
    Line #95 (got 1 columns instead of 7)
    Line #118 (got 1 columns instead of 7)
    Line #129 (got 1 columns instead of 7)
    Line #166 (got 1 columns instead of 7)
    Line #169 (got 1 columns instead of 7)
    Line #172 (got 1 columns instead of 7)
    Line #224 (got 1 columns instead of 7)
    Line #239 (got 1 columns instead of 7)
    Line #316 (got 1 columns instead of 7)
    Line #319 (got 1 columns instead of 7)
    Line #325 (got 1 columns instead of 7)
    Line #89 (got 1 columns instead of 7)
    Line #181 (got 1 columns instead of 7)
    Line #270 (got 1 columns instead of 7)
    Line #275 (got 1 columns instead of 7)
    Line #342 (got 1 columns instead of 7)
    Line #350 (got 1 columns instead of 7)
    Line #381 (got 1 columns instead of 7)
    Line #383 (got 1 columns instead of 7)
    Line #401 

    Line #4 (got 1 columns instead of 7)
    Line #10 (got 1 columns instead of 7)
    Line #69 (got 1 columns instead of 7)
    Line #81 (got 1 columns instead of 7)
    Line #10 (got 1 columns instead of 7)
    Line #82 (got 1 columns instead of 7)
    Line #124 (got 1 columns instead of 7)
    Line #135 (got 1 columns instead of 7)
    Line #219 (got 1 columns instead of 7)
    Line #37 (got 1 columns instead of 7)
    Line #42 (got 1 columns instead of 7)
    Line #68 (got 1 columns instead of 7)
    Line #321 (got 1 columns instead of 7)
    Line #400 (got 1 columns instead of 7)
    Line #405 (got 1 columns instead of 7)
    Line #409 (got 1 columns instead of 7)
    Line #7 (got 1 columns instead of 7)
    Line #281 (got 1 columns instead of 7)
    Line #284 (got 1 columns instead of 7)
    Line #330 (got 1 columns instead of 7)
    Line #337 (got 1 columns instead of 7)
    Line #351 (got 1 columns instead of 7)
    Line #378 (got 1 columns instead of 7)
    Line #391 (got 1 co

    Line #47 (got 1 columns instead of 7)
    Line #214 (got 1 columns instead of 7)
    Line #218 (got 1 columns instead of 7)
    Line #292 (got 1 columns instead of 7)
    Line #414 (got 1 columns instead of 7)
    Line #136 (got 1 columns instead of 7)
    Line #164 (got 1 columns instead of 7)
    Line #171 (got 1 columns instead of 7)
    Line #237 (got 1 columns instead of 7)
    Line #280 (got 1 columns instead of 7)
    Line #292 (got 1 columns instead of 7)
    Line #353 (got 1 columns instead of 7)
    Line #366 (got 1 columns instead of 7)
    Line #397 (got 1 columns instead of 7)
    Line #409 (got 1 columns instead of 7)
    Line #416 (got 1 columns instead of 7)
    Line #4 (got 1 columns instead of 7)
    Line #76 (got 1 columns instead of 7)
    Line #99 (got 1 columns instead of 7)
    Line #153 (got 1 columns instead of 7)
    Line #176 (got 1 columns instead of 7)
    Line #288 (got 1 columns instead of 7)
    Line #311 (got 1 columns instead of 7)
    Line #338 (g

    Line #97 (got 1 columns instead of 7)
    Line #6 (got 5 columns instead of 7)
    Line #47 (got 1 columns instead of 7)
    Line #49 (got 1 columns instead of 7)
    Line #56 (got 1 columns instead of 7)
    Line #84 (got 1 columns instead of 7)
    Line #91 (got 1 columns instead of 7)
    Line #121 (got 1 columns instead of 7)
    Line #210 (got 1 columns instead of 7)
    Line #30 (got 1 columns instead of 7)
    Line #31 (got 1 columns instead of 7)
    Line #69 (got 1 columns instead of 7)
    Line #75 (got 1 columns instead of 7)
    Line #150 (got 1 columns instead of 7)
    Line #159 (got 1 columns instead of 7)
    Line #191 (got 1 columns instead of 7)
    Line #202 (got 1 columns instead of 7)
    Line #308 (got 1 columns instead of 7)
    Line #310 (got 1 columns instead of 7)
    Line #350 (got 1 columns instead of 7)
    Line #382 (got 1 columns instead of 7)
    Line #396 (got 1 columns instead of 7)
    Line #86 (got 1 columns instead of 7)
    Line #92 (got 1 colu

    Line #16 (got 1 columns instead of 7)
    Line #228 (got 1 columns instead of 7)
    Line #354 (got 1 columns instead of 7)
    Line #401 (got 1 columns instead of 7)
    Line #163 (got 1 columns instead of 7)
    Line #348 (got 1 columns instead of 7)
    Line #269 (got 1 columns instead of 7)
    Line #401 (got 1 columns instead of 7)
    Line #40 (got 1 columns instead of 7)
    Line #341 (got 1 columns instead of 7)
    Line #343 (got 1 columns instead of 7)
    Line #345 (got 1 columns instead of 7)
    Line #347 (got 1 columns instead of 7)
    Line #33 (got 1 columns instead of 7)
    Line #41 (got 1 columns instead of 7)
    Line #69 (got 1 columns instead of 7)
    Line #71 (got 1 columns instead of 7)
    Line #186 (got 1 columns instead of 7)
    Line #267 (got 1 columns instead of 7)
    Line #326 (got 1 columns instead of 7)
    Line #365 (got 1 columns instead of 7)
    Line #370 (got 1 columns instead of 7)
    Line #375 (got 1 columns instead of 7)
    Line #11 (got

    Line #11 (got 1 columns instead of 7)
    Line #13 (got 1 columns instead of 7)
    Line #27 (got 1 columns instead of 7)
    Line #88 (got 1 columns instead of 7)
    Line #117 (got 1 columns instead of 7)
    Line #214 (got 1 columns instead of 7)
    Line #225 (got 1 columns instead of 7)
    Line #229 (got 1 columns instead of 7)
    Line #304 (got 1 columns instead of 7)
    Line #364 (got 1 columns instead of 7)
    Line #391 (got 1 columns instead of 7)
    Line #131 (got 1 columns instead of 7)
    Line #402 (got 1 columns instead of 7)
    Line #270 (got 1 columns instead of 7)
    Line #292 (got 1 columns instead of 7)
    Line #307 (got 1 columns instead of 7)
    Line #311 (got 1 columns instead of 7)
    Line #328 (got 1 columns instead of 7)
    Line #393 (got 1 columns instead of 7)
    Line #396 (got 1 columns instead of 7)
    Line #5 (got 1 columns instead of 7)
    Line #11 (got 1 columns instead of 7)
    Line #4 (got 1 columns instead of 7)
    Line #6 (got 1 c

    Line #86 (got 1 columns instead of 7)
    Line #278 (got 1 columns instead of 7)
    Line #346 (got 1 columns instead of 7)
    Line #363 (got 1 columns instead of 7)
    Line #372 (got 1 columns instead of 7)
    Line #375 (got 1 columns instead of 7)
    Line #7 (got 1 columns instead of 7)
    Line #253 (got 1 columns instead of 7)
    Line #11 (got 1 columns instead of 7)
    Line #43 (got 1 columns instead of 7)
    Line #49 (got 1 columns instead of 7)
    Line #53 (got 1 columns instead of 7)
    Line #58 (got 1 columns instead of 7)
    Line #61 (got 1 columns instead of 7)
    Line #79 (got 1 columns instead of 7)
    Line #43 (got 1 columns instead of 7)
    Line #222 (got 1 columns instead of 7)
    Line #298 (got 1 columns instead of 7)
    Line #20 (got 1 columns instead of 7)
    Line #26 (got 1 columns instead of 7)
    Line #29 (got 1 columns instead of 7)
    Line #36 (got 1 columns instead of 7)
    Line #42 (got 1 columns instead of 7)
    Line #53 (got 1 columns

    Line #5 (got 1 columns instead of 7)
    Line #11 (got 1 columns instead of 7)
    Line #17 (got 1 columns instead of 7)
    Line #25 (got 1 columns instead of 7)
    Line #28 (got 1 columns instead of 7)
    Line #34 (got 1 columns instead of 7)
    Line #47 (got 1 columns instead of 7)
    Line #117 (got 1 columns instead of 7)
    Line #135 (got 1 columns instead of 7)
    Line #144 (got 1 columns instead of 7)
    Line #148 (got 1 columns instead of 7)
    Line #151 (got 1 columns instead of 7)
    Line #159 (got 1 columns instead of 7)
    Line #162 (got 1 columns instead of 7)
    Line #170 (got 1 columns instead of 7)
    Line #174 (got 1 columns instead of 7)
    Line #185 (got 1 columns instead of 7)
    Line #198 (got 1 columns instead of 7)
    Line #203 (got 1 columns instead of 7)
    Line #215 (got 1 columns instead of 7)
    Line #227 (got 1 columns instead of 7)
    Line #236 (got 1 columns instead of 7)
    Line #250 (got 1 columns instead of 7)
    Line #255 (got 

    Line #31 (got 1 columns instead of 7)
    Line #59 (got 1 columns instead of 7)
    Line #120 (got 1 columns instead of 7)
    Line #167 (got 1 columns instead of 7)
    Line #257 (got 1 columns instead of 7)
    Line #285 (got 1 columns instead of 7)
    Line #319 (got 1 columns instead of 7)
    Line #338 (got 1 columns instead of 7)
    Line #386 (got 1 columns instead of 7)
    Line #70 (got 1 columns instead of 7)
    Line #84 (got 1 columns instead of 7)
    Line #234 (got 1 columns instead of 7)
    Line #306 (got 1 columns instead of 7)
    Line #343 (got 1 columns instead of 7)
    Line #384 (got 1 columns instead of 7)
    Line #397 (got 1 columns instead of 7)
    Line #96 (got 1 columns instead of 7)
    Line #194 (got 1 columns instead of 7)
    Line #276 (got 1 columns instead of 7)
    Line #310 (got 1 columns instead of 7)
    Line #354 (got 1 columns instead of 7)
    Line #368 (got 1 columns instead of 7)
    Line #382 (got 1 columns instead of 7)
    Line #389 (g

These errors are due to issues with the files being produced by the app. Some lines have only one column (part of sourceIdentifier for Garmin) and others are the header line, but it's mid-document like at line 3.

# Are there other problematic sourses besides garmin? 

In [6]:
for subject in results:
    print subject, 
    
    inSubject = False
    for day in results[subject]:
        if 'com.garmin.connect.mobile' in results[subject][day]:
            inSubject = True
            
    print subject, inSubject
    if not inSubject:
        print "!", pprint(results[subject])

6e070717-16ee-4485-8c05-a6f376b5d1bf 6e070717-16ee-4485-8c05-a6f376b5d1bf True
89a190e2-6b78-46b5-b469-27d231bbf123 89a190e2-6b78-46b5-b469-27d231bbf123 True
105de5d8-e264-47cc-8bdc-d4aff7d568bb 105de5d8-e264-47cc-8bdc-d4aff7d568bb True
bcfa41b8-8e9f-48f9-81e1-c27831cadecf bcfa41b8-8e9f-48f9-81e1-c27831cadecf True
92d1c57e-5cce-4738-89d6-a7f67ed9baf7 92d1c57e-5cce-4738-89d6-a7f67ed9baf7 False
!{datetime.date(2017, 4, 18): {'com.apple.health.4D0C936B-7988-43DF-': 57437,
                              'com.apple.health.4E5475C8-511D-480D-': 56064}}
 None
6bcb2e5c-cfcf-48c5-bc3a-a0ef78fd4c25 6bcb2e5c-cfcf-48c5-bc3a-a0ef78fd4c25 False
!{datetime.date(2017, 1, 8): {'com.misfitwearables.Prometheus': 33682},
 datetime.date(2017, 1, 9): {'com.misfitwearables.Prometheus': 213034},
 datetime.date(2017, 1, 10): {'com.misfitwearables.Prometheus': 258072},
 datetime.date(2017, 1, 11): {'com.misfitwearables.Prometheus': 16652},
 datetime.date(2017, 1, 12): {'com.misfitwearables.Prometheus': 81468},
 

Yes! - dupliate com.apple.health (folks with phones and watches), misfitwearable

# More

[Here's a thread](https://forums.garmin.com/archive/index.php/t-346431.html) that suggests what is going on:

khodak 03-30-2016, 11:44 PM
...
I have a slightly different problem. Fenix 3 shows 6000 steps (which is correct), and Apple Health over 40000! Trouble is, Garmin is writing the same data multiple times into Health. It's driving me mad :(