In [1]:
import os 
import pandas as pd
import numpy as np

Feature Engineering
- Peak value
- Peak-to-peak value
- Amplitude
- Energy
- Frequency
- Peak-to-peak
- ARV: Average rectified value
- Weighted-ARV: 
- FFT: Frequency spektrum, different frequency parts (maybe)
   
Clustering

Data Augmentation

# Feature Engineering

In [2]:
df_geophone = pd.read_csv('merged_geophone_falls.csv')
df_geophone.head()

Unnamed: 0,activity,fall_binary,distance_m,person_binary,weight,floor,value_1,value_2,value_3,value_4,...,value_491,value_492,value_493,value_494,value_495,value_496,value_497,value_498,value_499,value_500
0,FCS,1,1,1,75.0,H,59,-38,10,43,...,10,-8,-6,3,6,-5,10,10,-12,-6
1,FCS,1,1,1,75.0,H,104,-49,137,-46,...,-1,-15,-9,6,13,6,5,6,-4,-17
2,FCS,1,1,1,75.0,H,-11,48,-13,8,...,170,-107,-246,70,158,110,-52,-100,63,76
3,FCS,1,1,1,75.0,H,16,-11,22,4,...,2,-8,-13,0,13,1,-1,4,4,-9
4,FCS,1,1,1,75.0,H,5,-11,-4,2,...,36,12,2,13,8,7,9,-33,-11,-11


In [3]:
def add_max_min_avg_p2p_columns(df):
    """
    Adds four columns representing the maximum, minimum, average,
    and peak-to-peak value of signal data in each row.

    Parameters:
    - df (pd.DataFrame): The DataFrame containing signal data.

    Returns:
    - pd.DataFrame: The DataFrame with the new 'max_value', 'min_value',
      'avg_value', and 'p2p_value' columns.
    """
    # Signal data starts from column index 6
    signal_columns = df.columns[6:]

    # Compute values
    df['max_value'] = df[signal_columns].max(axis=1)
    df['min_value'] = df[signal_columns].min(axis=1)
    df['avg_value'] = df[signal_columns].mean(axis=1)
    df['p2p_value'] = df['max_value'] - df['min_value']

    return df

# Example usage:
df_geophone = add_max_min_avg_p2p_columns(df_geophone)
df_geophone.head()

Unnamed: 0,activity,fall_binary,distance_m,person_binary,weight,floor,value_1,value_2,value_3,value_4,...,value_495,value_496,value_497,value_498,value_499,value_500,max_value,min_value,avg_value,p2p_value
0,FCS,1,1,1,75.0,H,59,-38,10,43,...,6,-5,10,10,-12,-6,3335,-2904,-12.574,6239
1,FCS,1,1,1,75.0,H,104,-49,137,-46,...,13,6,5,6,-4,-17,1794,-5425,-15.622,7219
2,FCS,1,1,1,75.0,H,-11,48,-13,8,...,158,110,-52,-100,63,76,8664,-4533,22.256,13197
3,FCS,1,1,1,75.0,H,16,-11,22,4,...,13,1,-1,4,4,-9,8603,-1911,19.398,10514
4,FCS,1,1,1,75.0,H,5,-11,-4,2,...,8,7,9,-33,-11,-11,6241,-13943,-18.06,20184


## Add peak value

In [4]:
def add_peak_column(df):
    """
    Adds a column representing the peak value (maximum absolute value) 
    of signal data in each row.

    Parameters:
    - df (pd.DataFrame): The DataFrame containing signal data.

    Returns:
    - pd.DataFrame: The DataFrame with the new 'peak_value' column.
    """
    # Signal data starts from column index 6
    signal_columns = df.columns[6:]

    # Compute peak value (max absolute value)
    df['peak_value'] = df[signal_columns].abs().max(axis=1)

    return df

# Example usage:
df_geophone = add_peak_column(df_geophone)
df_geophone.head()

Unnamed: 0,activity,fall_binary,distance_m,person_binary,weight,floor,value_1,value_2,value_3,value_4,...,value_496,value_497,value_498,value_499,value_500,max_value,min_value,avg_value,p2p_value,peak_value
0,FCS,1,1,1,75.0,H,59,-38,10,43,...,-5,10,10,-12,-6,3335,-2904,-12.574,6239,6239.0
1,FCS,1,1,1,75.0,H,104,-49,137,-46,...,6,5,6,-4,-17,1794,-5425,-15.622,7219,7219.0
2,FCS,1,1,1,75.0,H,-11,48,-13,8,...,110,-52,-100,63,76,8664,-4533,22.256,13197,13197.0
3,FCS,1,1,1,75.0,H,16,-11,22,4,...,1,-1,4,4,-9,8603,-1911,19.398,10514,10514.0
4,FCS,1,1,1,75.0,H,5,-11,-4,2,...,7,9,-33,-11,-11,6241,-13943,-18.06,20184,20184.0


## Add energy value

In [5]:
def add_energy_column(df, sampling_interval=4.5/500):
    """
    Adds a column representing the energy (numerical integration of absolute signal values)
    to characterize the strength of the recorded signal.

    Parameters:
    - df (pd.DataFrame): The DataFrame containing signal data.
    - sampling_interval (float): The estimated time step between consecutive signal samples.

    Returns:
    - pd.DataFrame: The DataFrame with the new 'energy' column.
    """
    # Signal data starts from column index 6
    signal_columns = df.columns[6:]

    # Compute energy using numerical integration (Riemann sum approximation)
    df['energy'] = df['peak_value'] * sampling_interval

    return df

# Example usage:
df_geophone = add_energy_column(df_geophone)
df_geophone.head()

Unnamed: 0,activity,fall_binary,distance_m,person_binary,weight,floor,value_1,value_2,value_3,value_4,...,value_497,value_498,value_499,value_500,max_value,min_value,avg_value,p2p_value,peak_value,energy
0,FCS,1,1,1,75.0,H,59,-38,10,43,...,10,10,-12,-6,3335,-2904,-12.574,6239,6239.0,525.299166
1,FCS,1,1,1,75.0,H,104,-49,137,-46,...,5,6,-4,-17,1794,-5425,-15.622,7219,7219.0,522.986598
2,FCS,1,1,1,75.0,H,-11,48,-13,8,...,-52,-100,63,76,8664,-4533,22.256,13197,13197.0,849.935304
3,FCS,1,1,1,75.0,H,16,-11,22,4,...,-1,4,4,-9,8603,-1911,19.398,10514,10514.0,649.317582
4,FCS,1,1,1,75.0,H,5,-11,-4,2,...,9,-33,-11,-11,6241,-13943,-18.06,20184,20184.0,1130.09454
