In [52]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.distributions.empirical_distribution import ECDF


In [122]:
def import_data():
    # read data for testing
    data = pd.read_csv("../data/a02_p3.csv")
    data.timestamp = pd.to_datetime(data.timestamp, format='%H:%M:%S.%f')

    acc_x = data.x.to_numpy()
    acc_y = data.y.to_numpy()
    acc_z = data.z.to_numpy()
    # timestamp = data.timestamp.to_numpy()

    # is_stay, stay_times, stay_durations = find_stays(df.x, df.y, df.z, df.timestamp, params)
    # find_stays(df.x, df.y, df.z, df.timestamp, params)

    # unpack
    df = pd.DataFrame({'x': acc_x, 'y': acc_y, 'z': acc_z}, index = data.timestamp)
    df['norm'] = df.apply(lambda row: np.sqrt(row.x**2 + row.y**2 + row.z**2), axis=1)

    return df

# Initiate Class

In [202]:
fs = 25 #[Hz]
params = {"win_size_sec": 3,
"ecdf_diff_th": 0.01,
"var_th": 0.05,
"abrupt_filt_time_const": 10,
"abrupt_pctg_th": 0.2,
"min_stay_duration": 1,
"max_time_gap_msec": 1e3 * 5 / fs,
"max_section_gap_minutes": 7,
"max_time_gap_pctl": 60}

# methods

In [116]:
def avg_sample_rate(time_diffs,params):
    return (time_diffs.
        where(time_diffs <= np.percentile(time_diffs[time_diffs.notnull()], params["max_time_gap_pctl"])).
        map(lambda x: 1e9/x).
        mean())


In [136]:
def find_sections_idx(time_diffs_ns, params):
    max_section_gap = np.array(params["max_section_gap_minutes"], dtype='timedelta64[m]')

    max_section_gap_ns = max_section_gap.astype('timedelta64[ns]').astype('float64')
    section_idxs_list = time_diffs[time_diffs_ns > max_section_gap_ns].index.to_list()

    section_idxs_list.insert(0, time_diffs_ns.index[0])
    section_idxs_list.append(time_diffs_ns.index[-1])
    return pd.to_datetime(section_idxs_list)

In [120]:
def update_var_th(acc_abs,win_size_smp,MAX_HIST_BINS,params):
    acc_rollvar = acc_abs.rolling(win_size_smp, min_periods=0).var()
    acc_rollvar = acc_rollvar[acc_rollvar.notnull()]
    hist, bin_edges = np.histogram(acc_rollvar,bins=MAX_HIST_BINS)
    mvr_epdf = hist / sum(hist); # normalize to pdf
    knee_th = bin_edges[np.argwhere(mvr_epdf < params["ecdf_diff_th"])[-1]]
    if not knee_th.size:
        knee_th = params["var_th"]
    return float(min([params["var_th"], knee_th]))

In [157]:
def filter_abrupt_movements(is_stay_raw, abrupt_filt_size, params):
    soft_stay = is_stay_raw.rolling(abrupt_filt_size, min_periods=0).mean()
    return (soft_stay > params["abrupt_pctg_th"])

In [118]:
def convert_filters_size(time_diffs,params):
    fs = avg_sample_rate(time_diffs,params)
    sec2smp = lambda sec: np.floor(sec*fs).astype('int')
    win_size_smp = sec2smp(params["win_size_sec"])
    abrupt_filt_size = sec2smp(params["abrupt_filt_time_const"])
    return win_size_smp, abrupt_filt_size

In [142]:
def check_stay_raw(df, win_size_smp,var_th, NUM_DIMS):
    df_rollvar = df.rolling(win_size_smp, min_periods=0).var()
    is_axis_stay = df_rollvar[["x","y","z"]] < var_th/NUM_DIMS
    is_norm_stay = df_rollvar["norm"] < var_th
    is_stay = is_axis_stay.all(axis = 1) & is_norm_stay
    # is_stay.name = "is_stay"
    return is_stay

In [163]:
def find_time_tags(df):
    toggle_indicator = df.is_stay.astype(int).diff()
    start_times = toggle_indicator[toggle_indicator == 1].index
    end_times = toggle_indicator[toggle_indicator == -1].index
    return start_times, end_times

# run

In [195]:
#definitions
df = import_data()
df = df.sort_index()
df['diff_ns'] = np.insert(np.diff(df.index.to_numpy().astype('float')),0, None)
df['is_stay'] = True
    
MAX_HIST_BINS = int(1e4)
NUM_DIMS = 3 # {x y z}

In [196]:
# calc params in sample (ctor)

win_size_smp, abrupt_filt_size = convert_filters_size(df['diff_ns'],params)
var_th = update_var_th(df["norm"],win_size_smp,MAX_HIST_BINS,params) # optionally update var_th

# split to sections (main)
sections_times = find_sections_idx(df['diff_ns'], params) #find sections in large sequence of data seperated by max_section_gap_minutes

sec=0
sections_times[sec:sec+1]

DatetimeIndex(['1900-01-01 15:04:51.552000'], dtype='datetime64[ns]', freq=None)

In [197]:
# go through each section and decide isStay
for sec in range(len(sections_times)-1):
    is_stay = check_stay_raw(df.loc[sections_times[sec]:sections_times[sec+1]].iloc[:-1], #excluding left endpoint
    win_size_smp, var_th, NUM_DIMS)
    df.loc[is_stay.index, "is_stay"] = is_stay

# df.is_stay = check_stay_raw(df, win_size_smp, var_th, NUM_DIMS)
# df.is_stay.sum()

In [198]:
# filter abrupt movements
df.is_stay = filter_abrupt_movements(df.is_stay, abrupt_filt_size, params)
# df.is_stay.sum()

# force sectioning
df.is_stay.loc[sections_times] = False


In [203]:
# find start & end times
start_times, end_times = find_time_tags(df)
stays = pd.DataFrame({"start_times": start_times, "end_times": end_times})
stays['duration'] = stays.apply(lambda row: row["end_times"] - row["start_times"], axis=1)


In [204]:
# cancle short sojourns
if stays.empty:
    pass #return stays

stay_long_enough = stays['duration'] > pd.to_timedelta(params["min_stay_duration"], unit = 'm')
stays = stays[stay_long_enough]
stays

# stay_times(is_short_stay,:) = [];
# stay_durations(is_short_stay,:) = [];

Unnamed: 0,start_times,end_times,duration
2,1900-01-01 15:05:45.559200960,1900-01-01 15:07:32.253426856,00:01:46.694225
3,1900-01-01 15:07:50.095805840,1900-01-01 15:09:19.107674089,00:01:29.011868
4,1900-01-01 15:09:21.948052807,1900-01-01 16:04:51.552000000,00:55:29.603947
6,1900-01-01 16:05:45.559200960,1900-01-01 16:07:32.253426856,00:01:46.694225
7,1900-01-01 16:07:50.095805840,1900-01-01 16:09:19.107674089,00:01:29.011868


In [201]:
df.is_stay.isnull().sum()

0

In [None]:
s = df.index.to_series().diff()
s
