In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import numpy as np

In [2]:
with open('../data/export.xml') as fp:
    soup = BeautifulSoup(fp, 'lxml-xml')

In [3]:
flights = soup.findAll(
    'Record', {'type': 'HKQuantityTypeIdentifierFlightsClimbed'})
distance = soup.findAll(
    'Record', {'type': 'HKQuantityTypeIdentifierDistanceWalkingRunning'})
steps = soup.findAll(
    'Record', {'type': 'HKQuantityTypeIdentifierStepCount'})

In [144]:
fdf = pd.DataFrame()
sdf = pd.DataFrame()
ddf = pd.DataFrame()

fdf.loc[:, 'start_date'] = [num['startDate'] for num in flights]
fdf.loc[:, 'end_date'] = [num['endDate'] for num in flights]
fdf.loc[:, 'num_floors'] = [num['value'] for num in flights]
fdf.loc[:, 'source'] = [num['sourceName'] for num in flights]
fdf.loc[:, 'num_floors'] = fdf.num_floors.astype(int)

sdf.loc[:, 'start_date'] = [num['startDate'] for num in steps]
sdf.loc[:, 'end_date'] = [num['endDate'] for num in steps]
sdf.loc[:, 'num_steps'] = [num['value'] for num in steps]
sdf.loc[:, 'source'] = [num['sourceName'] for num in steps]
sdf.loc[:, 'num_steps'] = sdf.num_steps.astype(int)

ddf.loc[:, 'start_date'] = [num['startDate'] for num in distance]
ddf.loc[:, 'end_date'] = [num['endDate'] for num in distance]
ddf.loc[:, 'tot_dist'] = [num['value'] for num in distance]
ddf.loc[:, 'source'] = [num['sourceName'] for num in distance]
ddf.loc[:, 'tot_dist'] = ddf.tot_dist.astype(float)

fdf.sort_values(by=['start_date', 'end_date'], inplace=True)
sdf.sort_values(by=['start_date', 'end_date'], inplace=True)
ddf.sort_values(by=['start_date', 'end_date'], inplace=True)

fdf.reset_index(inplace=True)
fdf.drop(columns=['index'], inplace=True)
sdf.reset_index(inplace=True)
sdf.drop(columns=['index'], inplace=True)
ddf.reset_index(inplace=True)
ddf.drop(columns=['index'], inplace=True)

In [145]:
def clean_start_end_times(df):
    start_date_df = df.start_date.str.split(' ', expand=True).drop(columns=[2])
    end_date_df = df.end_date.str.split(' ', expand=True).drop(columns=[2])

    start_date_df.loc[:, 'std'] = start_date_df[0] + ' ' + start_date_df[1]
    end_date_df.loc[:, 'etd'] = end_date_df[0] + ' ' + end_date_df[1]

    start_date_df.loc[:, 'sd'] = pd.to_datetime(
        start_date_df[0], format='%Y-%m-%d')
    end_date_df.loc[:, 'ed'] = pd.to_datetime(
        end_date_df[0], format='%Y-%m-%d')

    start_date_df.loc[:, 'st'] = pd.to_timedelta(start_date_df[1])
    end_date_df.loc[:, 'et'] = pd.to_timedelta(end_date_df[1])

    df.loc[:, 'start_date'] = start_date_df.sd
    df.loc[:, 'start_time'] = pd.to_numeric(start_date_df.st) / 3.6e12
    df.loc[:, 'end_date'] = end_date_df.ed
    df.loc[:, 'end_time'] = pd.to_numeric(end_date_df.et) / 3.6e12
    df.loc[:, 'duration'] = pd.to_numeric(
        (end_date_df.ed + end_date_df.et) - (start_date_df.sd + start_date_df.st)
    ) / 3.6e12

    return df

In [146]:
ddf = clean_start_end_times(ddf)
sdf = clean_start_end_times(sdf)
fdf = clean_start_end_times(fdf)

In [147]:
ddf.head()

Unnamed: 0,start_date,end_date,tot_dist,source,start_time,end_time,duration
0,2017-02-17,2017-02-17,0.025756,Mojito’s iPhone,21.39,21.511667,0.121667
1,2017-02-17,2017-02-17,0.082226,Mojito’s iPhone,21.607778,21.738611,0.130833
2,2017-02-17,2017-02-17,0.019579,Mojito’s iPhone,21.738611,21.864444,0.125833
3,2017-02-17,2017-02-17,0.008028,Mojito’s iPhone,21.876111,21.976944,0.100833
4,2017-02-17,2017-02-17,0.086389,Mojito’s iPhone,21.976944,22.126389,0.149444


In [148]:
ddf.columns

Index(['start_date', 'end_date', 'tot_dist', 'source', 'start_time',
       'end_time', 'duration'],
      dtype='object')

In [149]:
print(ddf.info(), ddf.tot_dist.sum(), ddf.duration.sum(), sep='\n\n\n')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38452 entries, 0 to 38451
Data columns (total 7 columns):
start_date    38452 non-null datetime64[ns]
end_date      38452 non-null datetime64[ns]
tot_dist      38452 non-null float64
source        38452 non-null object
start_time    38452 non-null float64
end_time      38452 non-null float64
duration      38452 non-null float64
dtypes: datetime64[ns](2), float64(4), object(1)
memory usage: 2.1+ MB
None


2727.3623978190003


9645.449444444444


In [150]:
ddf.duration.max()

41.0775

In [151]:
ddf.loc[:, 'mph'] = ddf['tot_dist'] / ddf['duration']
ndf1 = ddf[ddf['start_date'] + one_day == ddf['end_date']].copy()
ndf2 = ddf[ddf['start_date'] + one_day == ddf['end_date']].copy()

ndf1.loc[:, 'end_date'] = ndf1['start_date'].copy()
ndf1.loc[:, 'end_time'] = 24 - 1e-10
ndf1.loc[:, 'duration'] = ndf1['end_time'] - ndf1['start_time']
ndf1.loc[:, 'tot_dist'] = ndf1['mph'] * ndf1['duration']
ndf1.reset_index(inplace=True)
ndf1.drop(columns=['index'], inplace=True)

ndf2.loc[:, 'start_date'] = ndf2['end_date'].copy()
ndf2.loc[:, 'start_time'] = 0
ndf2.loc[:, 'duration'] = ndf2['end_time'].copy()
ndf2.loc[:, 'tot_dist'] = ndf2['mph'] * ndf2['duration']
ndf2.reset_index(inplace=True)
ndf2.drop(columns=['index'], inplace=True)

ddf = ddf[ddf['start_date'] + one_day != ddf['end_date']].copy()
ddf = ddf.append([ndf1, ndf2], ignore_index=True, sort=False)
ddf.sort_values(by=['start_date', 'start_time', 'end_time'], inplace=True)
ddf.reset_index(inplace=True)
ddf.drop(columns=['index'], inplace=True)

In [152]:
print(ddf.info(), ddf.tot_dist.sum(), ddf.duration.sum(), ddf.duration.max(), sep='\n\n\n')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38773 entries, 0 to 38772
Data columns (total 8 columns):
start_date    38773 non-null datetime64[ns]
end_date      38773 non-null datetime64[ns]
tot_dist      38773 non-null float64
source        38773 non-null object
start_time    38773 non-null float64
end_time      38773 non-null float64
duration      38773 non-null float64
mph           38773 non-null float64
dtypes: datetime64[ns](2), float64(5), object(1)
memory usage: 2.4+ MB
None


2727.362397812499


9645.449444412345


41.0775


In [153]:
measure_unit = 'tot_dist'
speed_unit = 'mph'
one_day = pd.to_timedelta(1, 'D')
two_days = pd.to_timedelta(2, 'D')
ddf.loc[:, speed_unit] = ddf[measure_unit] / ddf['duration']

ndf1 = ddf[ddf['start_date'] + two_days == ddf['end_date']].copy()
ndf2 = ddf[ddf['start_date'] + two_days == ddf['end_date']].copy()
ndf3 = ddf[ddf['start_date'] + two_days == ddf['end_date']].copy()

ndf1.loc[:, 'end_date'] = ndf1['start_date'].copy()
ndf1.loc[:, 'end_time'] = 24 - 1e-10
ndf1.loc[:, 'duration'] = (24 - 1e-10) - ndf1['start_time']
ndf1.loc[:, measure_unit] = ndf1[speed_unit] * ndf1['duration']
ndf1.reset_index(inplace=True)
ndf1.drop(columns=['index'], inplace=True)

ndf2.loc[:, 'start_date'] = ndf2['start_date'] + one_day
ndf2.loc[:, 'end_date'] = ndf2['start_date'] + one_day
ndf2.loc[:, 'start_time'] = 0
ndf1.loc[:, 'end_time'] = 24 - 1e-10
ndf2.loc[:, 'duration'] = 24 - 1e-10
ndf2.loc[:, measure_unit] = ndf2[speed_unit] * (24 - 1e-10)
ndf2.reset_index(inplace=True)
ndf2.drop(columns=['index'], inplace=True)

ndf3.loc[:, 'start_date'] = ndf3['end_date'].copy()
ndf3.loc[:, 'start_time'] = 0
ndf3.loc[:, 'duration'] = ndf3['end_time'].copy()
ndf3.loc[:, measure_unit] = ndf3[speed_unit] * ndf3['duration']
ndf3.reset_index(inplace=True)
ndf3.drop(columns=['index'], inplace=True)

ddf = ddf[ddf['start_date'] + two_days != ddf['end_date']].copy()
ddf = ddf.append([ndf1, ndf2, ndf3], ignore_index=True, sort=False)
ddf.sort_values(by=['start_date', 'start_time', 'end_time'], inplace=True)
ddf.reset_index(inplace=True)
ddf.drop(columns=['index'], inplace=True)

In [154]:
print(ddf.info(), ddf.tot_dist.sum(), ddf.duration.sum(), ddf.duration.max(), sep='\n\n\n')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38777 entries, 0 to 38776
Data columns (total 8 columns):
start_date    38777 non-null datetime64[ns]
end_date      38777 non-null datetime64[ns]
tot_dist      38777 non-null float64
source        38777 non-null object
start_time    38777 non-null float64
end_time      38777 non-null float64
duration      38777 non-null float64
mph           38775 non-null float64
dtypes: datetime64[ns](2), float64(5), object(1)
memory usage: 2.4+ MB
None


2727.3623978124456


9645.449444411945


23.9999999999


In [28]:
print(len(ndf1), ndf1.tot_dist.sum(), ndf1.duration.sum(), sep='\n')

321
34.769925773000004
340.4530555555556


In [45]:
print(ndf1.tot_dist.sum() + ndf2.tot_dist.sum())
print(ndf1.duration.sum() + ndf2.duration.sum())

34.76992576649873
340.4530555234561


In [31]:
print(len(ndf1), ndf1.tot_dist.sum(), ndf1.duration.sum(), sep='\n')

321
24.358353810830288
266.6808333012339


In [34]:
ndf1

Unnamed: 0,start_date,end_date,tot_dist,source,start_time,end_time,duration,mph
284,2017-02-25,2017-02-25,0.008638,Mojito’s iPhone,23.905278,24.0,0.094722,0.091188
2600,2017-05-03,2017-05-03,0.025891,Mojito’s iPhone,23.943333,24.0,0.056667,0.456892
2853,2017-05-13,2017-05-13,0.034119,Mojito’s iPhone,23.836111,24.0,0.163889,0.208184
3274,2017-05-26,2017-05-26,0.012100,Mojito’s iPhone,23.878056,24.0,0.121944,0.099224
3678,2017-06-05,2017-06-05,0.025755,Mojito’s iPhone,23.897778,24.0,0.102222,0.251950
5054,2017-07-07,2017-07-07,0.020150,Mojito’s iPhone,23.840833,24.0,0.159167,0.126595
5387,2017-07-14,2017-07-14,0.134020,Mojito’s iPhone,23.928333,24.0,0.071667,1.870049
6150,2017-07-28,2017-07-28,0.000597,Mojito’s iPhone,23.986667,24.0,0.013333,0.044739
6204,2017-07-29,2017-07-29,0.029077,Mojito’s iPhone,23.956111,24.0,0.043889,0.662510
7026,2017-08-19,2017-08-19,0.003977,Mojito’s iPhone,23.989444,24.0,0.010556,0.376794


In [37]:
ndf2

Unnamed: 0,start_date,end_date,tot_dist,source,start_time,end_time,duration,mph
284,2017-02-26,2017-02-26,4.280770e-03,Mojito’s iPhone,0,0.046944,0.046944,0.091188
2600,2017-05-04,2017-05-04,4.340475e-02,Mojito’s iPhone,0,0.095000,0.095000,0.456892
2853,2017-05-14,2017-05-14,5.782900e-04,Mojito’s iPhone,0,0.002778,0.002778,0.208184
3274,2017-05-27,2017-05-27,4.409963e-03,Mojito’s iPhone,0,0.044444,0.044444,0.099224
3678,2017-06-06,2017-06-06,4.829037e-03,Mojito’s iPhone,0,0.019167,0.019167,0.251950
5054,2017-07-08,2017-07-08,4.923135e-04,Mojito’s iPhone,0,0.003889,0.003889,0.126595
5387,2017-07-15,2017-07-15,1.766158e-01,Mojito’s iPhone,0,0.094444,0.094444,1.870049
6150,2017-07-29,2017-07-29,5.629624e-03,Mojito’s iPhone,0,0.125833,0.125833,0.044739
6204,2017-07-30,2017-07-30,5.833768e-02,Mojito’s iPhone,0,0.088056,0.088056,0.662510
7026,2017-08-20,2017-08-20,5.379783e-02,Mojito’s iPhone,0,0.142778,0.142778,0.376794


In [38]:
print(len(ndf2), ndf2.tot_dist.sum(), ndf2.duration.sum(), sep='\n')

321
10.411571955668444
73.77222222222221


In [16]:
mdf = ddf[ddf['start_date'] != ddf['end_date']]

In [None]:
mdf.loc[:, 'mph'] = mdf['tot_dist'] / mdf['duration']
mdf.loc[:, 'end_date'] = mdf['start_date'].copy()
mdf.loc[:, 'end_time'] = 24 - 1e-10
mdf.loc[:, 'duration'] = mdf['end_time'] - mdf['start_time']
mdf.loc[:, 'tot_dist'] = mdf['mph'] * mdf['duration']

In [189]:

ddf.loc[:, 'mph'] = ddf['tot_dist'] / ddf['duration']
for i in ddf.index:
    if ddf['start_date'][i] + one_day == ddf.end_date[i]:
        add_st = 0
        new_st = ddf.start_time[i]
        new_et = 24 - 1e-8
        add_et = ddf.end_time[i]
        new_dr = new_et - new_st
        add_dr = add_et
        speed  = ddf['mph'][i]
        new_td = speed * new_dr
        add_td = speed * add_dr
        new_sed = ddf['start_date'][i]
        add_sed = ddf['end_date'][i]
        src = ddf['source'][i]
        ddf.loc[i, :] = [new_sed, new_sed, new_td, src, new_st, new_et, new_dr, speed]
        ddf.loc[len(ddf), :] = [add_sed, add_sed, add_td, src, add_st, add_et, add_dr, speed]
        ddf.sort_values(by=['start_date', 'start_time', 'end_time'], inplace=True)
        ddf.reset_index(inplace=True)
        ddf.drop(columns=['index'], inplace=True)
    elif ddf['start_date'][i] + two_days == ddf.end_date[i]:
        new_st = ddf.start_time[i]
        add1_st, add2_st = 0, 0

        new_et, add1_et = 24 - 1e-8, 24 - 1e-8
        add2_et = ddf.end_time[i]

        new_dr = 24 - new_st
        add1_dr = 24
        add2_dr = add2_et

        speed  = ddf['mph'][i]

        new_td = speed * new_dr
        add1_td = speed * add1_dr
        add2_td = speed * add2_dr

        new_sed = ddf['start_date'][i]
        add1_sed = ddf['start_date'][i] + one_day
        add2_sed = ddf['end_date'][i]

        src = ddf['source'][i]

        ddf.loc[i, :] = [new_sed, new_sed, new_td, src, new_st, new_et, new_dr, speed]
        ddf.loc[len(ddf), :] = [add1_sed, add1_sed, add1_td, src, add1_st, add1_et, add1_dr, speed]
        ddf.loc[len(ddf) + 1, :] = [add2_sed, add2_sed, add2_td, src, add2_st, add2_et, add2_dr, speed]
        ddf.sort_values(by=['start_date', 'start_time', 'end_time'], inplace=True)
        ddf.reset_index(inplace=True)
        ddf.drop(columns=['index'], inplace=True)
        

In [190]:
ddf.source.value_counts()

Mojito’s iPhone    26974
Connected          11798
Name: source, dtype: int64

In [191]:
print(ddf.info(), ddf.tot_dist.sum(), ddf.duration.sum(), sep='\n\n\n')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38772 entries, 0 to 38771
Data columns (total 8 columns):
start_date    38772 non-null datetime64[ns]
end_date      38772 non-null datetime64[ns]
tot_dist      38772 non-null float64
source        38772 non-null object
start_time    38772 non-null float64
end_time      38772 non-null float64
duration      38772 non-null float64
mph           38772 non-null float64
dtypes: datetime64[ns](2), float64(5), object(1)
memory usage: 2.4+ MB
None


2727.3623971691595


9645.449441284445


In [192]:
lddf = len(ddf)
pddf = 0
while lddf != pddf:
    lddf = len(ddf)
    ddf.loc[:, 'cond1'] = np.where(ddf.start_time >= ddf.start_time.shift(1), 1, 0)
    ddf.loc[:, 'cond2'] = np.where(ddf.end_time <= ddf.end_time.shift(1), 1, 0)
    ddf.loc[:, 'cond3'] = np.where(ddf.start_date == ddf.start_date.shift(1), 1, 0)
    ddf.loc[:, 'condsum'] = ddf.cond1 + ddf.cond2 + ddf.cond3
    ddf = ddf[ddf['condsum'] < 3].copy()
    pddf = len(ddf)
for i in ddf.index:
    if type(ddf.source[i]) == pd.core.series.Series:
        ddf.loc[i, 'source'] = ddf.source[i][0]

In [193]:
ddf.columns

Index(['start_date', 'end_date', 'tot_dist', 'source', 'start_time',
       'end_time', 'duration', 'mph', 'cond1', 'cond2', 'cond3', 'condsum'],
      dtype='object')

In [194]:
ddf.loc[:, 'cond1'] = np.where(ddf.end_time >= ddf.start_time.shift(1), 1, 0)
ddf.loc[:, 'cond2'] = np.where(ddf.start_date == ddf.start_date.shift(1), 1, 0)
ddf.loc[:, 'conds'] = np.where(ddf.source != ddf.source.shift(1), 1, 0)
ddf.loc[:, 'condsum'] = ddf.cond1 + ddf.cond2 + ddf.conds

In [195]:
ddf.drop(columns=['cond1', 'cond2', 'conds'], inplace=True)

In [196]:
ddf.tail(50)

Unnamed: 0,start_date,end_date,tot_dist,source,start_time,end_time,duration,mph,cond3,condsum
38689,2019-04-09,2019-04-09,0.000621,Connected,6.001389,7.001389,1.0,0.000621,1,2
38690,2019-04-09,2019-04-09,0.053438,Connected,7.001389,7.418056,0.416667,0.128251,1,2
38691,2019-04-09,2019-04-09,0.21172,Mojito’s iPhone,7.3225,7.480556,0.158056,1.339529,1,3
38693,2019-04-09,2019-04-09,0.137944,Connected,7.434722,7.601389,0.166667,0.827664,1,3
38694,2019-04-09,2019-04-09,0.373662,Mojito’s iPhone,7.494167,7.659722,0.165556,2.257019,1,3
38695,2019-04-09,2019-04-09,0.202567,Connected,7.601389,7.668056,0.066667,3.038505,1,3
38696,2019-04-09,2019-04-09,0.041632,Connected,7.668056,8.001389,0.333333,0.124896,1,2
38698,2019-04-09,2019-04-09,0.111225,Connected,8.001389,9.001389,1.0,0.111225,1,2
38699,2019-04-09,2019-04-09,0.19014,Connected,9.001389,10.001389,1.0,0.19014,1,2
38704,2019-04-09,2019-04-09,0.0,Connected,10.001389,11.001389,1.0,0.0,1,2


In [207]:
ddf.loc[:, 'start_time'] = np.where(
    ddf['condsum'] == 3, ddf.end_time.shift(1) + 1e-10, ddf.start_time)
ddf.loc[:, 'duration'] = np.where(
    ddf['condsum'] == 3, ddf.end_time - ddf.start_time, ddf['duration'])
ddf.loc[:, 'tot_dist'] =  np.where(
    ddf['condsum'] == 3, ddf.duration * ddf['mph'], ddf['tot_dist'])

In [208]:
ddf.tail(50)

Unnamed: 0,start_date,end_date,tot_dist,source,start_time,end_time,duration,mph,cond3,condsum
38689,2019-04-09,2019-04-09,0.000621,Connected,6.001389,7.001389,1.0,0.000621,1,2
38690,2019-04-09,2019-04-09,0.053438,Connected,7.001389,7.418056,0.416667,0.128251,1,2
38691,2019-04-09,2019-04-09,0.083721,Mojito’s iPhone,7.418056,7.480556,0.0625,1.339529,1,3
38693,2019-04-09,2019-04-09,0.100009,Connected,7.480556,7.601389,0.120833,0.827664,1,3
38694,2019-04-09,2019-04-09,0.131659,Mojito’s iPhone,7.601389,7.659722,0.058333,2.257019,1,3
38695,2019-04-09,2019-04-09,0.025321,Connected,7.659722,7.668056,0.008333,3.038505,1,3
38696,2019-04-09,2019-04-09,0.041632,Connected,7.668056,8.001389,0.333333,0.124896,1,2
38698,2019-04-09,2019-04-09,0.111225,Connected,8.001389,9.001389,1.0,0.111225,1,2
38699,2019-04-09,2019-04-09,0.19014,Connected,9.001389,10.001389,1.0,0.19014,1,2
38704,2019-04-09,2019-04-09,0.0,Connected,10.001389,11.001389,1.0,0.0,1,2


In [111]:
for i in ddf.index:
    if ddf['condsum'][i] == 3:
        new_st = ddf.end_time[i-1] + 1e-10
        new_et = ddf.end_time[i]
        new_dr = new_et - new_st
        speed  = ddf['mph'][i]
        new_td = speed * new_dr
        src = ddf['source'][i]
        srt_dt = ddf['start_date'][i]
        end_dt = ddf['end_date'][i]
        ddf.loc[i, :] = [srt_dt, end_dt, new_td, src, new_st, new_et, new_dr, speed, 3]

In [112]:
print(ddf.info(), ddf.tot_dist.sum(), ddf.duration.sum(), sep='\n\n\n')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31799 entries, 0 to 31798
Data columns (total 9 columns):
start_date    31799 non-null datetime64[ns]
end_date      31799 non-null datetime64[ns]
tot_dist      31799 non-null float64
source        31799 non-null object
start_time    31799 non-null float64
end_time      31799 non-null float64
duration      31799 non-null float64
mph           31799 non-null float64
condsum       31799 non-null int64
dtypes: datetime64[ns](2), float64(5), int64(1), object(1)
memory usage: 2.2+ MB
None


2200.588676252133


8839.10666328057


In [52]:
def remove_overlap_times_first(df):
    '''Step : 5
       Remove all the rows that overlap times.

       To remove overlapping rows, make sure to check if start date for the
       same row are the same and make sure the following row has the same
       starting and ending date.

       Also, the starting time of the following row has to be greater than or
       equal to the starting time for the row and the ending time for the
       following row has to be less than or equal to the row to check.

       Once the row is identified, drop the row, and reset index.

       Using while loop, since the function can keep checking the same row
       with the following rows.'''

    i = 0
    while i < len(df) - 1:

        if df['source'][i] == df['source'][i + 1]:
            i += 1
        else:
            if df.start_time[i] <= df.start_time[i+1] and (
                df.end_time[i] >= df.end_time[i+1]) and (
                df.start_date[i] == df.start_date[i+1] and
                df.start_date[i] == df.end_date[i+1]) and (
                df.start_date[i] == df.end_date[i]):

                df.drop(index=(i+1), inplace=True)
                df.reset_index(inplace=True)
                df.drop(columns=['index'], inplace=True)
            else:
                i += 1
    return df

In [79]:
ddf.condsum.sum()

110661

In [81]:
ddf

Unnamed: 0,start_date,end_date,tot_dist,source,start_time,end_time,duration,cond1,cond2,cond3,cond4,condsum
0,2017-02-17,2017-02-17,0.025756,Mojito’s iPhone,21.390000,21.511667,00:07:18,0,0,0,1,1
1,2017-02-17,2017-02-17,0.082226,Mojito’s iPhone,21.607778,21.738611,00:07:51,0,1,1,1,3
2,2017-02-17,2017-02-17,0.019579,Mojito’s iPhone,21.738611,21.864444,00:07:33,0,1,1,1,3
3,2017-02-17,2017-02-17,0.008028,Mojito’s iPhone,21.876111,21.976944,00:06:03,0,1,1,1,3
4,2017-02-17,2017-02-17,0.086389,Mojito’s iPhone,21.976944,22.126389,00:08:58,0,1,1,1,3
5,2017-02-17,2017-02-17,0.109740,Mojito’s iPhone,22.273611,22.425000,00:09:05,0,1,1,1,3
6,2017-02-17,2017-02-17,0.008128,Mojito’s iPhone,22.425000,22.574722,00:08:59,0,1,1,1,3
7,2017-02-17,2017-02-17,0.002691,Mojito’s iPhone,22.854722,22.986389,00:07:54,0,1,1,1,3
8,2017-02-18,2017-02-18,0.008345,Mojito’s iPhone,9.919167,10.020556,00:06:05,1,0,0,1,2
9,2017-02-18,2017-02-18,0.006761,Mojito’s iPhone,11.562500,11.668056,00:06:20,0,1,1,1,3


In [53]:
ddf = remove_overlap_times_first(ddf)

In [54]:
sdf = remove_overlap_times_first(sdf)

In [55]:
fdf = remove_overlap_times_first(fdf)

In [56]:
ddf.duration.sum()

9255.076666666668

In [57]:
sdf.duration.sum()

9254.939722222221

In [58]:
fdf.duration.sum()

8.471111111111112

In [41]:
ddf = remove_overlap_times_second(ddf)

In [42]:
sdf = remove_overlap_times_second(sdf)

In [43]:
fdf = remove_overlap_times_second(fdf)

In [44]:
ddf.duration.sum()

9255.060000000001

In [45]:
sdf.duration.sum()

9254.923055555555

In [46]:
fdf.duration.sum()

8.471111111111112