Linear Relationships and Ground Detection (#15)

* Added class for linear regressions and simplified statistical relationship forming * Added the raw sensors to their cropped counter part to cope with indexing in radicl * Added point property for gis work * Added in ground detection w/ tests * Improved various methods by limiting the number of points needed to agreement on detection * Slight improvement to force start selection * Added resolution for agg by depth * Signal event returns none when not found. Other functions handle whether we want that in the context. * Improved handing of index errors that wont plot the data in radicl. * Split up read csv function to enable metadata read only from the profile class
AdventureData · Jan 23, 2024 · e6f0e33 · e6f0e33
1 parent a9e66c8
commit e6f0e33
Show file tree

Hide file tree

Showing 17 changed files with 71,481 additions and 97 deletions.
diff --git a/setup.py b/setup.py
@@ -8,7 +8,7 @@
     readme = readme_file.read()
 
 requirements = ["pandas > 1.2.0", "pandas< 2.0.0",
-                "scipy>=1.8.0", "scipy<2.0.0"]
+                "scipy>=1.8.0", "scipy<2.0.0", "shapely"]
 test_requirements = ['pytest>=3', ]
 
 setup(

diff --git a/study_lyte/adjustments.py b/study_lyte/adjustments.py
@@ -2,14 +2,19 @@
 import pandas as pd
 from scipy.signal import lfilter
 
-def get_points_from_fraction(n_samples, fraction):
+def get_points_from_fraction(n_samples, fraction, maximum=None):
     """
     Return the nearest whole int from a fraction of the
     number of samples. Never returns 0.
     """
     idx = int(fraction * n_samples) or 1
     if idx == n_samples:
         idx = n_samples - 1
+
+    if maximum is not None:
+        if idx > maximum:
+            idx = maximum
+
     return idx
 
 
@@ -45,6 +50,25 @@ def get_neutral_bias_at_border(series: pd.Series, fractional_basis: float = 0.00
     bias_adj = series - bias
     return bias_adj
 
+def get_neutral_bias_at_index(series: pd.Series, index, fractional_basis: float = 0.005):
+    """
+    Bias adjust the series data by using the XX % of the data centered on an provided index
+
+    Args:
+        series: pandas series of data with a known bias
+        fractional_basis: Fraction of data to use to estimate the bias on start
+    Returns:
+        bias_adj: bias adjusted data to near zero
+    """
+    n = get_points_from_fraction(len(series), fractional_basis)
+    start = index-n
+    start = start if start > 0 else 0
+    stop = index + n
+    stop = stop if stop < len(series) else len(series)-1
+
+    bias = series.values[start:stop].mean()
+    bias_adj = series - bias
+    return bias_adj
 
 def get_normalized_at_border(series: pd.Series, fractional_basis: float = 0.01, direction='forward'):
     """
@@ -155,23 +179,40 @@ def apply_calibration(series, coefficients, minimum=None, maximum=None):
     return result
 
 
-def aggregate_by_depth(df, new_depth, df_depth_col='depth', agg_method='mean'):
+def aggregate_by_depth(df, new_depth=None, resolution=None, df_depth_col='depth', agg_method='mean'):
     """
     Aggregate the dataframe by the new depth using whatever method
     provided. Data in the new depth is considered to be the bottom of
     the aggregation e.g. 10, 20 == 0-10, 11-20 etc
     Depth data must be monotonic.
     new_depth data much be coarser than current depth data
+
+    Args:
+        df: Dataframe containing at least depth as a columne
+        new_depth: Optional array of depth positions to aggregate to (useful for arbitrary delineations e.g. hand hardness)
+        resolution: Aggregate to a resolution in centimeter. Optional
+        df_depth_col: Column name for depth
+        agg_method: Method to aggregate
+
+
     """
+    # Determine new depth data
+    if new_depth is None:
+        resolution = -1 * abs(resolution)
+        new_depth = np.arange(resolution, df[df_depth_col].min() + resolution, resolution)
+
+    # Determine datum type
     if new_depth[-1] < 0:
         surface_datum = True
     else:
         surface_datum = False
+
     if df.index.name is not None:
         df = df.reset_index()
     dcol = df_depth_col
     cols = [c for c in df.columns if c not in [dcol, 'time']]
     new = []
+
     # is the user request specific aggregation by column
     agg_col_specific = True if type(agg_method) == dict else False
 
@@ -195,6 +236,7 @@ def aggregate_by_depth(df, new_depth, df_depth_col='depth', agg_method='mean'):
                 ind = ind & (df[dcol] >= d1)
             else:
                 ind = ind & (df[dcol] > d1)
+
         if agg_col_specific:
             for z,c in enumerate(cols):
                 nr = getattr(df[c][ind], agg_method[c])()

diff --git a/study_lyte/depth.py b/study_lyte/depth.py
@@ -123,13 +123,6 @@ def get_constrained_baro_depth(baro_depth, start, stop, method='nanmedian'):
     constrained = result.set_index('time')
     #assume_no_upward_motion(result[baro])
     constrained = constrained - constrained.iloc[0]
-
-    # from .plotting import plot_constrained_baro
-    # pos = get_depth_from_acceleration(df).mul(100)
-    # pos = pos.reset_index()
-    # plot_constrained_baro(df, result, const, pos, top, bottom, start, stop,
-    #                       baro=baro, acc_axis=acc_axis)
-
     return constrained
 
 

diff --git a/study_lyte/detect.py b/study_lyte/detect.py
@@ -1,9 +1,16 @@
 import numpy as np
 from scipy.signal import find_peaks, argrelextrema
 
-from .adjustments import get_neutral_bias_at_border, get_normalized_at_border, get_points_from_fraction
+from .adjustments import (get_neutral_bias_at_border, get_normalized_at_border, get_points_from_fraction, get_neutral_bias_at_index,zfilter)
 from .decorators import directional
 
+def find_nearest_value_index(search_value, series):
+    """
+    Given an array and a value, this function finds the index of the
+    value closest to the search value
+    """
+    idx = np.abs(search_value - series).argmin()
+    return idx
 
 def first_peak(arr, default_index=1, **find_peak_kwargs):
     """
@@ -95,14 +102,16 @@ def get_signal_event(signal_series, threshold=0.001, search_direction='forward',
 
     # If no results are found, return the first index the series
     if len(ind) == 0:
-        event_idx = 0
+        event_idx = None
     else:
         # Return the first value matching the conditions
         event_idx = ind[-1]
 
     # Invert the index
-    if 'backward' in search_direction:
+    if 'backward' in search_direction and event_idx is not None:
         event_idx = len(arr) - 1 - event_idx
+        # if event_idx == 0:
+        #     event_idx = None
 
     return event_idx
 
@@ -126,10 +135,12 @@ def get_acceleration_start(acceleration, threshold=-0.01, max_threshold=0.02):
     acceleration_start = get_signal_event(accel_neutral[0:max_ind+1], threshold=threshold, max_threshold=max_threshold,
                                           n_points=n_points,
                                           search_direction='forward')
+    if acceleration_start is None:
+        acceleration_start = 0
     return acceleration_start
 
 
-def get_acceleration_stop(acceleration, threshold=-0.05, max_threshold=0.05):
+def get_acceleration_stop(acceleration, threshold=-0.2, max_threshold=0.1):
     """
     Returns the index of the last value that has a relative change greater than the
     threshold of absolute normalized signal
@@ -141,9 +152,6 @@ def get_acceleration_stop(acceleration, threshold=-0.05, max_threshold=0.05):
         acceleration_start: Integer of index in array of the first value meeting the criteria
     """
     acceleration = acceleration.values
-    n = get_points_from_fraction(len(acceleration), 0.005)
-    if n > 40:
-        n = 20
 
     min_idx = np.argwhere(acceleration == acceleration.min())[0][0]
     max_idx = np.argwhere(acceleration == acceleration.max())[0][0]
@@ -156,11 +164,16 @@ def get_acceleration_stop(acceleration, threshold=-0.05, max_threshold=0.05):
         # Use the farthest deceleration
         search_start = min_idx
 
+    n = get_points_from_fraction(len(acceleration[search_start:]), 0.05, maximum=1000)
     acceleration_stop = get_signal_event(acceleration[search_start:], threshold=threshold,
                                          max_threshold=max_threshold,
                                          n_points=n,
                                          search_direction='backward')
-    acceleration_stop = acceleration_stop + search_start
+
+    if acceleration_stop is None:
+        acceleration_stop = len(acceleration) - 1
+    else:
+        acceleration_stop = acceleration_stop + search_start
     return acceleration_stop
 
 
@@ -209,7 +222,8 @@ def get_nir_stop(active, fractional_basis=0.05, max_threshold=0.008, threshold=-
     n_points = get_points_from_fraction(len(data), fractional_basis)
     stop = get_signal_event(data, search_direction='backward', threshold=threshold,
                             max_threshold=max_threshold, n_points=n_points)
-    stop += ind
+    if stop is not None:
+        stop += ind
 
     return stop
 
@@ -218,10 +232,41 @@ def get_sensor_start(signal, fractional_basis=0.05, max_threshold=0.05, threshol
     """
     Before entering the snow we don't see much dynamic signal. This detects the first change in the signal
     """
-    ind = np.where(signal == signal.max())[0][0]
+    ind = np.where(signal == signal.min())[0][0]
     n_points = get_points_from_fraction(len(signal), 0.01)
     data = signal[:ind]
     norm_signal = get_normalized_at_border(data, fractional_basis=fractional_basis, direction='forward') - 1
     first_change = get_signal_event(norm_signal, search_direction='forward', threshold=threshold,
                             max_threshold=max_threshold, n_points=n_points)
-    return first_change
+    return first_change
+
+
+def get_ground_strike(signal, stop_idx):
+    """
+    The probe hits ground somtimes before we detect stop.
+    """
+    buffer = get_points_from_fraction(len(signal), 0.05)
+    start = stop_idx - buffer
+    start = start if start > 0 else 0
+    end = stop_idx + buffer
+    end = end if end < len(signal) else len(signal)-1
+    rel_stop = stop_idx - start
+
+    sig_arr = signal[start:end]
+    norm1 = get_neutral_bias_at_index(sig_arr, rel_stop + buffer)
+
+    # norm1 = get_neutral_bias_at_border(signal[start:end], 0.1, 'backward')
+    diff = zfilter(norm1.diff(), 0.001)    # Large change in signal
+    impact = get_signal_event(diff, threshold=-1000, max_threshold=-70, n_points=1, search_direction='forward')
+
+    # Large chunk of data that's the same near the stop
+    n_points = get_points_from_fraction(len(norm1), 0.1)
+    long_press = get_signal_event(norm1, threshold=-150, max_threshold=150, n_points=n_points, search_direction='backward')
+    tol = get_points_from_fraction(len(norm1), 0.2)
+
+    ground = None
+    if impact is not None and long_press is not None:
+        if (long_press-tol) <= impact <= (long_press+tol):
+            ground = impact + start
+
+    return ground
diff --git a/study_lyte/io.py b/study_lyte/io.py
@@ -2,19 +2,9 @@
 import pandas as pd
 import numpy as np
 
+def find_metadata(f:str) -> [int, dict]:
+    """Read just the metadata from the probe files"""
 
-def read_csv(f: str) -> Tuple[pd.DataFrame, dict]:
-    """
-    Reads any Lyte probe CSV and returns a dataframe
-    and metadata dictionary from the header
-
-    Args:
-        f: Path to csv, or file buffer
-    Returns:
-        tuple:
-            **df**: pandas Dataframe
-            **header**: dictionary containing header info
-    """
     # Collect the header
     metadata = {}
 
@@ -32,17 +22,35 @@ def read_csv(f: str) -> Tuple[pd.DataFrame, dict]:
             else:
                 header_position = i
                 break
+    return header_position, metadata
 
-        df = pd.read_csv(f, header=header_position)
-        # Drop any columns written with the plain index
-        df.drop(df.filter(regex="Unname"), axis=1, inplace=True)
+def read_data(f:str, metadata:dict, header_position:int) -> Tuple[pd.DataFrame, dict]:
+    """Read just the csv to enable parsing metadata and header position separately"""
+    df = pd.read_csv(f, header=header_position)
+    # Drop any columns written with the plain index
+    df.drop(df.filter(regex="Unname"), axis=1, inplace=True)
 
-        if 'time' not in df and 'SAMPLE RATE' in metadata:
-            sr = int(metadata['SAMPLE RATE'])
-            n = len(df)
-            df['time'] = np.linspace(0, n/sr, n)
+    if 'time' not in df and 'SAMPLE RATE' in metadata:
+        sr = int(metadata['SAMPLE RATE'])
+        n = len(df)
+        df['time'] = np.linspace(0, n/sr, n)
+    return df, metadata
 
-        return df, metadata
+def read_csv(f: str) -> Tuple[pd.DataFrame, dict]:
+    """
+    Reads any Lyte probe CSV and returns a dataframe
+    and metadata dictionary from the header
+
+    Args:
+        f: Path to csv, or file buffer
+    Returns:
+        tuple:
+            **df**: pandas Dataframe
+            **header**: dictionary containing header info
+    """
+    header_position, metadata = find_metadata(f)
+    df, metadata = read_data(f, metadata, header_position)
+    return df, metadata
 
 
 def write_csv(df: pd.DataFrame, meta: dict, f: str) -> None:

diff --git a/study_lyte/plotting.py b/study_lyte/plotting.py
@@ -108,4 +108,19 @@ def plot_fused_depth(acc_depth, baro_depth, avg, scaled_baro=None, error=None):
         ax = plot_ts(scaled_baro, ax=ax, data_label='Scaled Baro', show=False)
 
     ax.legend()
-    plt.show()
+    plt.show()
+
+
+
+def plot_ground_strike(signal, search_start, stop_idx, impact, long_press, ground):
+    events = [('stop', stop_idx)]
+    if long_press is not None:
+        events.append(('long_press', long_press + search_start))
+    if impact is not None:
+        events.append(('impact', impact + search_start))
+    if ground is not None:
+        events.append(('ground', ground))
+    ax = plot_ts(signal, events=events,show=False)
+    ax.legend()
+    plt.show()
+