# Steps Analysis (02/17/17 - 10/24/18)

In [56]:
import pandas as pd
import seaborn as sns
from bs4 import BeautifulSoup
import ast
from math import ceil

In [57]:
steps_df = pd.DataFrame(columns=['start_date', 'end_date',
                                 'num_steps', 'source'])
start_date_col = []
end_date_col = []
num_steps_col = []
source_col = []

In [58]:
with open('./data/export.xml') as fp:
    soup = BeautifulSoup(fp, 'lxml-xml')

In [59]:
steps = soup.findAll('Record', {'type': 'HKQuantityTypeIdentifierStepCount'})
for num in steps:
    start_date_col.append(num['startDate'])
    end_date_col.append(num['endDate'])
    num_steps_col.append(num['value'])
    source_col.append(num['sourceName'])

In [60]:
steps_df['start_date'] = start_date_col
steps_df['end_date'] = end_date_col
steps_df['num_steps'] = num_steps_col
steps_df['source'] = source_col

In [61]:
steps_df['num_steps'] = steps_df.num_steps.astype(int)

In [62]:
sd_df = steps_df.start_date.str.split(' ', expand=True)
sd_df.columns = ['start_date', 'start_time', 'time_zone']
sd_df = sd_df.drop(columns=['time_zone'])
sd_df['start_date'] = pd.to_datetime(sd_df['start_date'], format='%Y/%m/%d')
sd_df['start_time'] = pd.to_timedelta(sd_df['start_time'])

In [63]:
ed_df = steps_df.end_date.str.split(' ', expand=True)
ed_df.columns = ['end_date', 'end_time', 'time_zone']
ed_df = ed_df.drop(columns=['time_zone'])
ed_df['end_date'] = pd.to_datetime(ed_df['end_date'], format='%Y/%m/%d')
ed_df['end_time'] = pd.to_timedelta(ed_df['end_time'])

In [64]:
steps_df = steps_df.drop(columns=['start_date', 'end_date'])

In [65]:
dur_df = pd.concat([sd_df, ed_df], axis=1)
dur_df['sdt'] = dur_df['start_date']+dur_df['start_time']
dur_df['edt'] = dur_df['end_date']+dur_df['end_time']
dur_df['duration'] = dur_df['edt']-dur_df['sdt']
dur_df = dur_df.drop(columns=['sdt', 'edt'])
dur_df.head()

Unnamed: 0,start_date,start_time,end_date,end_time,duration
0,2017-02-17,21:23:24,2017-02-17,21:30:42,00:07:18
1,2017-02-17,21:36:28,2017-02-17,21:44:19,00:07:51
2,2017-02-17,21:44:19,2017-02-17,21:51:52,00:07:33
3,2017-02-17,21:52:34,2017-02-17,21:58:37,00:06:03
4,2017-02-17,21:58:37,2017-02-17,22:07:35,00:08:58


In [103]:
df = pd.concat([steps_df, dur_df], axis=1)
df.columns

Index(['num_steps', 'source', 'start_date', 'start_time', 'end_date',
       'end_time', 'duration'],
      dtype='object')

In [104]:
df = df[['start_date', 'start_time', 'end_date',
         'end_time', 'num_steps', 'duration', 'source']]

In [105]:
df.sort_values(by=['start_date', 'start_time'], inplace=True)
df.tail(20)

Unnamed: 0,start_date,start_time,end_date,end_time,num_steps,duration,source
27715,2018-10-24,13:18:52,2018-10-24,13:23:19,59,00:04:27,iPhone
27744,2018-10-24,14:00:59,2018-10-24,15:00:59,46,01:00:00,Connected
27745,2018-10-24,15:00:59,2018-10-24,16:00:59,164,01:00:00,Connected
27746,2018-10-24,16:00:59,2018-10-24,17:00:59,304,01:00:00,Connected
27716,2018-10-24,16:06:19,2018-10-24,16:15:56,153,00:09:37,iPhone
27717,2018-10-24,16:21:58,2018-10-24,16:22:29,76,00:00:31,iPhone
27747,2018-10-24,17:00:59,2018-10-24,17:49:59,790,00:49:00,Connected
27718,2018-10-24,17:02:07,2018-10-24,17:11:02,105,00:08:55,iPhone
27719,2018-10-24,17:17:57,2018-10-24,17:27:57,218,00:10:00,iPhone
27720,2018-10-24,17:28:38,2018-10-24,17:38:37,381,00:09:59,iPhone


In [106]:
df['start_time'] = pd.to_numeric(df['start_time'])/3600000000000
df['end_time'] = pd.to_numeric(df['end_time'])/3600000000000
df['duration'] = pd.to_numeric(df['duration'])/3600000000000

In [107]:
# for num in range(len(df)):
#     if df['end_date'][num] != df['start_date'][num]:
#         df['duration'][num] = df['end_time'][num]+24 - df['start_time'][num]
# df.head()

In [108]:
# for num in range(len(df)):
#     if df.start_date[num] != df.end_date[num]:
#         total_steps = df.num_steps[num]
#         steps_per_min = total_steps/df.duration[num]
#         time_left_in_start_date = 24-df.start_time[num]
#         steps_for_start_date = ceil(time_left_in_start_date*60*steps_per_min)
#         steps_for_end_date = total_steps-steps_for_start_date
#         df.num_steps[num] = steps_for_start_date
#         df.num_steps[num+1] += steps_for_end_date
# df[5050:5100]

In [110]:
i = 0
while i<len(df):
    if (i in df.index) and (i+1 in df.index):
        if df['source'][i] == df['source'][i+1]:
            if df['end_time'][i] == df['start_time'][i+1]:
                df.loc[:,'end_time'][i] = df.loc[:,'end_time'][i+1]
                df.loc[:,'num_steps'][i] += df.loc[:,'num_steps'][i+1]
                df.loc[:,'duration'][i] += df.loc[:,'duration'][i+1]
                df.drop(df.index[i+1], inplace=True)
                print(df.iloc[i])
                i+=2
            else:
                i+=1
        else:
            i+=1
    else:
        i+=1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


start_date    2017-02-17 00:00:00
start_time                21.9769
end_date      2017-02-17 00:00:00
end_time                  22.1264
num_steps                     215
duration                 0.149444
source                     iPhone
Name: 4, dtype: object
start_date    2017-02-18 00:00:00
start_time                16.9114
end_date      2017-02-18 00:00:00
end_time                  17.0781
num_steps                     161
duration                 0.166667
source                     iPhone
Name: 13, dtype: object
start_date    2017-02-18 00:00:00
start_time                17.7008
end_date      2017-02-18 00:00:00
end_time                  17.8506
num_steps                     459
duration                 0.149722
source                     iPhone
Name: 18, dtype: object
start_date    2017-02-18 00:00:00
start_time                18.5017
end_date      2017-02-18 00:00:00
end_time                  18.6669
num_steps                     239
duration                 0.165278
source     

KeyboardInterrupt: 

In [111]:
df.head(60)

Unnamed: 0,start_date,start_time,end_date,end_time,num_steps,duration,source
0,2017-02-17,21.39,2017-02-17,21.511667,58,0.121667,iPhone
1,2017-02-17,21.607778,2017-02-17,21.864444,220,0.256667,iPhone
3,2017-02-17,21.876111,2017-02-17,22.126389,233,0.250278,iPhone
4,2017-02-17,21.976944,2017-02-17,22.126389,215,0.149444,iPhone
6,2017-02-17,22.425,2017-02-17,22.574722,18,0.149722,iPhone
7,2017-02-17,22.854722,2017-02-17,22.986389,6,0.131667,iPhone
8,2017-02-18,9.919167,2017-02-18,10.020556,19,0.101389,iPhone
9,2017-02-18,11.5625,2017-02-18,11.668056,16,0.105556,iPhone
10,2017-02-18,14.168056,2017-02-18,14.296667,20,0.128611,iPhone
11,2017-02-18,16.658056,2017-02-18,16.902778,51,0.244722,iPhone
