In [1]:
import pandas as pd
import numpy as np 
import math
from datetime import datetime

In [2]:
# Class Datapoint contains magnitude and corresponding time values 

# Define a class named 'DataPoint'
class DataPoint:
    def __init__(self, mag, time):
        self.mag = mag  # Initialize the magnitude attribute
        self.time = time   # Initialize the time attribute
        
    def __str__(self):
        # Generate a string representation of the DataPoint
        ret = f"Acceleration magnitude is {self.mag} and time is {self.time}"
        return ret
    
    # Method to retrieve the time value of the DataPoint
    def getTime(self):
        return self.time 
    
    # Method to set the time value of the DataPoint
    def setTime(self, time):
        self.time = time
    
     # Method to retrieve the magnitude value of the DataPoint
    def getMagnitude(self):
        return self.mag
    
    # Method to set the magnitude value of the DataPoint
    def setMagnitude(self, mag):
        self.mag = mag

In [3]:
# 1. Pre-processing stage

def PreProcessingStage(data, samplingPeriod = 60):
    '''
    First stage of step counting
    -----------------------------
        This stage is responsible for computing the magnitude of the triaxial accelerometer signal
        ensuring a constant sampling frequency by means of linear interpolation.
    
    Parameters
    ----------
    data : DataFrame
        pandas dataframe containing accelerometer values viz. acc_x, acc_y, acc_z 
        and corresponding unix-timestamp
    samplingPeriod : float, optional
        The default is 60.
        sampling period is in milli second (15 Hz) to interpolate values

    Returns
    -------
    ppData : list
        the output list  of preprocessing stage which contains DataPoints.
    '''    
    ppData = []  # List to append the the return value of class data point i.e Datapoint 1 - Magnitude and corr time
    DataLen = len(data["time"]) # total number of data points in the data set
    for i in range(DataLen):
        acc_mag = ((data["acc_x"][i] ** 2 + data["acc_y"][i] ** 2 + data["acc_z"][i] **2)** 0.5) # acceleration magnitude
        cur_time = data["time"][i]
        dp = DataPoint(acc_mag, cur_time)
        ppData.append(dp)
    #print('preprocessing stage:', ppData)
    return ppData        

In [4]:
# 2. Filter Stage 

def FilterStage(ppData, filterLength = 13, filterSTD = 0.35):
    '''
    Second stage of step counting
    -----------------------------
        In order to reduce the noise level, algorithm implements a 
        finite impulse response (FIR) low-pass filter

    Parameters
    ----------
    ppData : list
        This ppData list is a output of preprocessing stage which contains DataPoints
    filterLength : int, optional
        The default is 13.
        length of window for a filter
    filterSTD : float, optional
        The default is 0.35.
        std dev for generating filter coefficients

    Returns
    -------
    smoothData : list
        smoothened data.
    
    '''
    
    midPoint = int(filterLength/2)
    filterVals = GenerateFilterCoef(filterLength, filterSTD) # Generate filter coeff
    filterSum = sum(filterVals)
    inputQueue = ppData[:] #shallow copy
    smoothData = [] # output of filter stage
    active = True
    window = [] #list contains data point values
    
    while(active):
        window.append(inputQueue.pop(0))
        if(len(inputQueue) == 0):
            active = False
            
        if(len(window) == filterLength):
            temp = [v1*v2.getMagnitude() for v1,v2 in zip(filterVals, window)]
            acc_new_mag = sum(temp)/filterSum
            dp = DataPoint(acc_new_mag, window[midPoint].getTime())
            smoothData.append(dp)
            window.pop(0)
    #print('filter stage:', smoothData)      
    return smoothData

def GenerateFilterCoef(filterLength = 13, filterSTD = 0.35):
    
    '''
    Generate the filter coefficients based on the filter length and std dev

    Parameters
    ----------
    filterLength : int, optional
        length of filter. The default is 13.
    filterSTD : float, optional
        std dev. The default is 0.35.

    Returns
    -------
    FIR_Vals : list
        filter coefficients.

    '''
    
    FIR_Vals = [math.pow(math.e, -0.5*math.pow((i - (filterLength - 1)/2) / 
                 (filterSTD * (filterLength - 1)/2), 2)) for i in range(filterLength)]
    
    return FIR_Vals
    

In [5]:
# 3. Scoring Stage

def ScoringStage(smoothData, windowSize = 35):
    
    '''
    Third stage of step counting
    ----------------------------
        The function of the scoring stage is to evaluate the peakiness of a given 
        sample. The result of this stage should increase the magnitude of any 
        peaks, making them more evident for the subsequent peak detection.

    Parameters
    ----------
    smoothData : list
        list containing smoothened datapoint values
    windowSize : int, optional
        window size for score peak calculation. The default is 35.

    Returns
    -------
    peakScoreData : list
        output of scoring stage.

    '''
    
    midPoint = int(windowSize/2) #Mid point of window
    inputQueue = smoothData[:] # shallow copy
    peakScoreData = [] 
    window = [] #list containing magnitude values
    active = True
    
    while(active):
        window.append(inputQueue.pop(0))
        if(len(inputQueue) == 0):
            active = False
            
        if(len(window) == windowSize):
            diffLeft = 0
            diffRight = 0
            # calculate diffleft and diffright based on the algorithm
            for i in range(midPoint):
                diffLeft += window[midPoint].getMagnitude() - window[i].getMagnitude();
            for J in range(midPoint, windowSize):
                diffRight += window[midPoint].getMagnitude() - window[J].getMagnitude();
        
            # Calculate the score and append to the output list
            score = (diffRight + diffLeft) / (windowSize - 1)
            dp = DataPoint(score, window[midPoint].getTime())
            peakScoreData.append(dp)
            # Pop out the oldest point from the window
            window.pop(0)
            
    #print('scoring stage:', peakScoreData)
        
    return peakScoreData
        

Detection Stage

In [6]:
# 4.Detection Stage

def DetectionStage(peakScoreData, threshold = 1.2):
    
    '''
    Fourth stage of step counting
    -----------------------------
        This stage identifies potential candidate peaks to be associated with a 
        step by statistically detecting outliers. 
        As the algorithm processes the signal, it keeps track of a running mean 
        and standard deviation. These two quantities are used to determine 
        whether any given sample is an outlier.

    Parameters
    ----------
    peakScoreData : list
        list containing peakiness values.
    threshold : float, optional
        detection threshold. The default is 1.2 assuming the sampling frequency is 100Hz.

    Returns
    -------
    outputQueue : list
        output list containing DataPoints.

    '''
    
    inputQueue = peakScoreData[:] # Shallow copy
    outputQueue = []
    # initial parameters
    active = True
    count = 0
    acc_mean = 0
    acc_std = 0
    
    while(active):
        dp = inputQueue.pop(0)
        if(len(inputQueue) == 0):
            active = False
            # dp = DataPoint(0, 0)
            # outputQueue.append(dp)
            # continue
        count +=1
        o_mean = acc_mean
        
        # Update calculations of mean and std deviation
        if(count == 1):
            acc_mean = dp.getMagnitude()
            acc_std = 0
        elif(count == 2):
            acc_mean = (acc_mean + dp.getMagnitude())/2
            acc_std = (((dp.getMagnitude() - acc_mean)**2 + (o_mean - acc_mean)**2 ) ** 0.5)/2            
        else:
            acc_mean = (dp.getMagnitude() + (count - 1)*acc_mean)/count
            acc_std = (((count - 2) * (acc_std**2)/(count-1)) + (o_mean - acc_mean)**2 + ((dp.getMagnitude() - acc_mean) ** 2)/count)**0.5
        
        # Once we have enough data points to have a reasonable mean/standard deviation, start detecting
        if(count >= 1): #Min data points to be counted is 1 data point
            if ((dp.getMagnitude() - acc_mean) > acc_std * threshold):
                # This is peak
                outputQueue.append(dp)
    
    #print('detection stage:', outputQueue)
    return outputQueue
  

In [7]:
# 5. Post-processing

def PostProcessStage(peakData, timeThreshold=200):
    '''
    Fifth Stage of Step Counting
    ----------------------------
        handles false positives from the detection stage by having a sliding 
        window of fixed size t_window and selecting the higher peak within the window

    Parameters
    ----------
    peakData : list
        this list is output of detection stage.
    timeThreshold : float/int, optional
        The default is 200. Time in millisecond
        By considerng human can walk max 5 steps in a sec.

    Returns
    -------
    steps : int
        number of steps detected by algorithm
    outputQueue : list
        list of datapoints for which step is detected.
    
    '''
    
    
    steps = 0 # number of steps detected
    inputQueue = peakData[:]
    outputQueue = []
    current = peakData[0]
    active = True
    while(active):
        dp = inputQueue.pop(0)
        if(len(inputQueue) == 0):
            active = False
            # dp = DataPoint(0, 0)
            # End of stage
            # continue
        
        if ((dp.getTime() - current.getTime()) > timeThreshold):
            # If the time difference exceeds the threshold, we have a confirmed step
            current = dp
            steps += 1
            outputQueue.append(dp)
        else:
            if (dp.getMagnitude() > current.getMagnitude()):
                # Keep the point with the largest magnitude.
                current = dp
    
    return steps, outputQueue

In [8]:
# Read csv file

def readCSVFile(data_files):
    
    '''
    read the csv file present at current path

    Parameters
    ----------
    data_files = list
        list of all data sets
    data : DataFrame
        pandas dataframe contain accelerometer and corr time values 
    sample_time: int
        time for one accelerometer data point
    sampli_frequency : float
        data collection frequency
    threshold:float
        detection threshold. default is 1.2

    Returns
    -------
    raw_DF : DataFrame
        pandas dataframe which contains all csv data.

    '''
    for file in data_files:
        data = pd.read_csv(file, header=None)
        file_name = file.split('_')
        data = data.iloc[:,-8:]
        data.columns = ['acc_x', 'acc_y', 'acc_z','a','b','c','constant', 'time']
        sample_time = int((data['time'][1]) - (data['time'][0])) #calculate actual sampli time
        sample_frequency = float(1/(sample_time * 1e-3)) #calculate actual sampling frequency
        thresholdvalue = float((1.2 * sample_frequency) / 100) #threshold value considering 1.2 is std value for 100Hz
        acc_value = data['acc_x'][0] 
        if acc_value >= -3000:   # Assumed threshold value to compensate tighteness or lossness of flexitail
            skipfilter = True
        else:
            skipfilter = False
        steps, d1 = RunAlgo(data,samplingPeriod = sample_time, SKIPFILTER = skipfilter, filterlength = 13, filterSTD = 0.35,  windowSize = 35, threshold = thresholdvalue, timeThreshold = 200)
        
        start_time = datetime.fromtimestamp(int(str(data['time'].iloc[0])[0:-3])).strftime('%Y-%m-%d %H:%M:%S')
        end_time = datetime.fromtimestamp(int(str(data['time'].iloc[-1])[0:-3])).strftime('%H:%M:%S')
        if len(file_name) <= 4:
            print( start_time, end_time, file_name[3], ':', steps)
        else:
            print( start_time, end_time, file_name[4], file_name[6], ':', steps)

In [9]:
# Run algorithm

def RunAlgo(data, samplingPeriod = 60, \
            SKIPFILTER = False, filterlength = 13, filterSTD =0.35, \
            windowSize = 35, threshold = 1.2, timeThreshold = 200) :
    
    '''
    Implement the oxford java step counter algorithm

    Parameters
    ----------
    data : DataFrame
        input data required for preprocessing stage.
    samplingPeriod : float, optional
        Time period to interpolate data points. The default is 60 millisecond.
    SKIPFILTER : bool, optional
        wheather filter stage should be executed or not. The default is False.
    filterLength : int, optional
        length of filter window. The default is 13.
    filterSTD : float, optional
        std dev for generating filter coefficients. The default is 0.35.
    windowSize : int, optional
        length of window in scoring stage. The default is 35.
    threshold : float, optional
        threshold required for detection stage. The default is 1.2.
    timeThreshold : float/int, optional
        time in millisecond, used to detect steps. The default is 200.

    Returns
    -------
    steps : int
        number of steps.
    detectedStepsList : list
        datapoints for which step is detected.

    '''
    
    ppData = PreProcessingStage(data)
    
    if (not SKIPFILTER):
        smoothData = FilterStage(ppData)
    else:
        smoothData = ppData
    peakScoreData = ScoringStage(smoothData, windowSize)
    peakData = DetectionStage(peakScoreData, threshold)
    steps, detectedStepsList = PostProcessStage(peakData, timeThreshold)
    return steps, detectedStepsList

In [10]:
def main(): 
    # Data sets for algorithm validation
    data_files = ['1663927235497_Max_walking_5hz-50steps.csv', '1666178020224_samsung_SM-A528B_FT26A_5Hz_walking_50-steps.csv', \
                  '1666177837406_samsung_SM-A528B_FT26A_5Hz_walking_100-steps.csv','1663927276636_Max_walking_5hz-100steps.csv', \
                  '1663927357038_Max_walking_5hz-150steps-fast.csv', '1663926837642_Max_walking_15hz-50steps.csv', \
                  '1666177915040_samsung_SM-A528B_FT26A_15Hz_walking_50-steps.csv', '1666177951490_samsung_SM-A528B_FT26A_15Hz_walking_100-steps.csv', \
                  '1667398754709_samsung_SM-A528B_FT91A_5Hz_walking_20steps.csv', '1667398773370_samsung_SM-A528B_FT91A_5Hz_walking_10-steps.csv', \
                  '1667398789600_samsung_SM-A528B_FT91A_5Hz_walking_15-steps.csv', '1667398820924_samsung_SM-A528B_FT91A_5Hz_walking_25steps.csv', \
                  '1667398842757_samsung_SM-A528B_FT91A_5Hz_walking_20steps.csv', '1667398861596_samsung_SM-A528B_FT91A_5Hz_walking_5-steps.csv', \
                  '1667398875681_samsung_SM-A528B_FT91A_5Hz_walking_4-steps.csv', '1667398884877_samsung_SM-A528B_FT91A_5Hz_walking_25-steps.csv', \
                  '1667398919949_samsung_SM-A528B_FT91A_5Hz_walking_50-steps.csv', '1667398956068_samsung_SM-A528B_FT91A_5Hz_walking_100-steps.csv', \
                  '1667399029417_samsung_SM-A528B_FT91A_15Hz_walking_5-steps.csv', '1667399038177_samsung_SM-A528B_FT91A_15Hz_walking_15-steps.csv', \
                  '1667399051107_samsung_SM-A528B_FT91A_15Hz_walking_100-steps.csv', '1667399179387_samsung_SM-A528B_FT91A_15Hz_walking_66-steps.csv']
    readCSVFile(data_files) # Read csv file

In [11]:
if __name__ == '__main__':
    main()

2022-09-23 15:30:35 15:31:13 5hz-50steps.csv : 38
2022-10-19 16:43:40 16:44:12 5Hz 50-steps.csv : 52
2022-10-19 16:40:37 16:41:41 5Hz 100-steps.csv : 111
2022-09-23 15:31:16 15:32:25 5hz-100steps.csv : 101
2022-09-23 15:32:37 15:34:20 5hz-150steps-fast.csv : 152
2022-09-23 15:23:57 15:24:47 15hz-50steps.csv : 53
2022-10-19 16:41:55 16:42:30 15Hz 50-steps.csv : 49
2022-10-19 16:42:31 16:43:30 15Hz 100-steps.csv : 100
2022-11-02 19:49:14 19:49:31 5Hz 20steps.csv : 20
2022-11-02 19:49:33 19:49:45 5Hz 10-steps.csv : 3
2022-11-02 19:49:49 19:50:03 5Hz 15-steps.csv : 18
2022-11-02 19:50:20 19:50:41 5Hz 25steps.csv : 26
2022-11-02 19:50:42 19:51:00 5Hz 20steps.csv : 20
2022-11-02 19:51:01 19:51:11 5Hz 5-steps.csv : 0
2022-11-02 19:51:15 19:51:23 5Hz 4-steps.csv : 2
2022-11-02 19:51:24 19:51:43 5Hz 25-steps.csv : 23
2022-11-02 19:51:59 19:52:35 5Hz 50-steps.csv : 53
2022-11-02 19:52:36 19:53:43 5Hz 100-steps.csv : 103
2022-11-02 19:53:49 19:53:57 15Hz 5-steps.csv : 4
2022-11-02 19:53:58 19:54: