In [233]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [249]:
def import_data():
    # read data for testing
    data = pd.read_csv("../data/a02_p3.csv")
    data.timestamp = pd.to_datetime(data.timestamp)

    acc_x = data.x.to_numpy()
    acc_y = data.y.to_numpy()
    acc_z = data.z.to_numpy()
    timestamp = data.timestamp.to_numpy()

    # is_stay, stay_times, stay_durations = find_stays(df.x, df.y, df.z, df.timestamp, params)
    # find_stays(df.x, df.y, df.z, df.timestamp, params)

    # unpack
    df = pd.DataFrame({'x': acc_x, 'y': acc_y, 'z': acc_z}, index = timestamp)
    acc_abs = np.sqrt(df.x**2 + df.y**2 + df.z**2)
    acc_abs.name = "norm"
    df = pd.concat([df, acc_abs], axis=1)
    return df

### Initiate Class

In [3]:
fs = 25 #[Hz]
params = {"win_size_sec": 3,
"ecdf_diff_th": 0.01,
"var_th": 0.05,
"abrupt_filt_time_const": 10,
"abrupt_pctg_th": 0.2,
"min_stay_duration": 4,
"max_time_gap_msec": 1e3 * 5 / fs,
"max_section_gap_minutes": 7,
"max_time_gap_pctl": 60}

### methods

In [4]:
def avg_sample_rate(time_diffs,params):
    mask = time_diffs <= np.percentile(time_diffs, params["max_time_gap_pctl"])+1 #+1 fixing numpy.percentile bug
    fs = np.mean(1e3/time_diffs[mask].astype('timedelta64[ms]').astype('float64')) #[Hz]
    return fs

In [241]:
def find_sections_idx(time_diffs, params):
    max_section_gap = np.array(params["max_section_gap_minutes"], dtype='timedelta64[m]')
    section_idxs = np.argwhere(time_diffs > max_section_gap) #the actuall important part, everything else is syntax...
    section_idxs_list = np.append(np.array(0), section_idxs) 
    section_idxs_list = np.append(section_idxs_list, len(time_diffs))
    return section_idxs_list

In [98]:
def update_var_th(acc_abs,win_size_smp,MAX_HIST_BINS,params):
    acc_rollvar = acc_abs.rolling(win_size_smp).var()
    acc_rollvar = acc_rollvar[acc_rollvar.notnull()]
    hist, bin_edges = np.histogram(acc_rollvar,bins=MAX_HIST_BINS)
    mvr_epdf = hist / sum(hist); # normalize to pdf
    knee_th = bin_edges[np.argwhere(mvr_epdf < params["ecdf_diff_th"])[-1]]
    if not knee_th.size:
        knee_th = params["var_th"]
    return float(min([params["var_th"], knee_th]))

In [234]:
def filter_abrupt_movements(is_stay_raw, abrupt_filt_size, params):
    soft_stay = is_stay_raw.rolling(abrupt_filt_size).mean()
    return (soft_stay > params["abrupt_pctg_th"])

In [10]:
def convert_filters_size(time_diffs,params):
    fs = avg_sample_rate(time_diffs,params)
    sec2smp = lambda sec: np.floor(sec*fs).astype('int32')
    win_size_smp = sec2smp(params["win_size_sec"])
    abrupt_filt_size = sec2smp(params["abrupt_filt_time_const"])
    return win_size_smp, abrupt_filt_size

In [122]:
def check_stay_raw(df, win_size_smp,var_th, NUM_DIMS):
    df_rollvar = df.rolling(win_size_smp).var()
    is_axis_stay = df_rollvar[["x","y","z"]] < var_th/NUM_DIMS
    is_norm_stay = df_rollvar["norm"] < var_th
    is_stay = is_axis_stay.all(axis = 1) & is_norm_stay
    is_stay.name = "is_stay"
    return is_stay

### run

In [248]:
#definitions
df = import_data()
df['is_stay'] = False
df = df.sort_index()
    
MAX_HIST_BINS = int(1e4)
NUM_DIMS = 3 # {x y z}

In [242]:
# calc params in sample (ctor)
time_diffs = np.diff(df.index) #util array

win_size_smp, abrupt_filt_size = convert_filters_size(time_diffs,params)
var_th = update_var_th(df["norm"],win_size_smp,MAX_HIST_BINS,params) # optionally update var_th

# split to sections (main)
sections_list_iloc = find_sections_idx(time_diffs, params) #find sections in large sequence of data seperated by max_section_gap_minutes
sections_range = [range(sections_list_iloc[ix],sections_list_iloc[ix+1]+1) for ix in range(len(sections_list_iloc)-1)]

In [209]:
# go through each section and decide isStay
for sec in range(len(sections_range)):
    sec_df = df.iloc[sections_range[sec]]
    is_stay = check_stay_raw(sec_df, win_size_smp, var_th, NUM_DIMS)
    df.loc[is_stay.index, "is_stay"] = is_stay

In [244]:
# filter abrupt movements
is_stay = filter_abrupt_movements(df.is_stay, abrupt_filt_size, params)
is_stay.iloc[sections_list_iloc[1:]] = False; # force sectioning
df.loc[is_stay.index, "is_stay"] = is_stay