# Faster Windowing Procedure

The windowing follows a certain procedure:

>•	Taking a 1 s block of the data

>•	Varying the block length from 1 s to 5 s with an increment of 200 ms (starting point remains the same for all blocks)

>•	Sectioning and feature generation for all blocks

>•	For each block class probabilities are calculated (ML classifier) 

>•	Sliding the starting point with an increment of 200 ms and starting again with a 1 s block varying to 5 s

*see animation below*

In [1]:
from IPython.display import HTML
HTML('<img src="windowing_procedure.gif" width=600 >')

In [2]:
import numpy as np
import pandas as pd
import sqlite3
import os
import matplotlib.pyplot as plt
import functionsMasterProjectMeinhart as fmpm

## Definition of the test subject

In [3]:
# define the test subject
test_subject = 'subject01'

# path to the csv-file with the whole record of the test subject
test_subject_dir = 'E:\Physio_Data\Subject_' + test_subject[-2:] # last two characters of the test subject (e.g. '01')
test_subject_file = test_subject + '.csv'
test_subject_path = os.path.join(test_subject_dir, test_subject_file)

## Loading and filtering of test subject data

In [None]:
# sampling rate of the signals
sampling_rate = 256 # [Hz]

# signal names
sig_names= ['Acc','Gyr']

# get data from selected file
sensor_data = fmpm.get_sensor_data(in_file=test_subject_path,
                                   signals=sig_names,
                                   sampling_rate=sampling_rate)

# filter properties according to Crema
cutoff = 10 # [Hz]
order = 6 # butterworth order

# filter data with butterworth filter and save to new dictionary
sensor_data_filt = {}
for signal in sig_names:
    sensor_data_filt[signal] = fmpm.butter_lowpass_filter(sensor_data[signal], 
                                                          cutoff=cutoff, 
                                                          fs=sampling_rate, 
                                                          order=order)
    
np.shape(sensor_data_filt['Acc'])

## Windowing procedure

### Definition of some parameters

In [None]:
# number of sections to split the signal
number_sections = 10

# abbreviations for exercises / non-exercise
exercise_abbrs = ['RF','RO','RS','LR','BC','TC','MP','SA','P1','P2','NE']

# window start increment
win_start_inc = 0.2 # [s]

# window stretch increment
win_stretch_inc = 0.2 # [s]

# minimum window length
win_min_len = 1 # [s]

# maximim window length
win_max_len = 5 # [s]

# signal length (all sensor data must have same length --> Acc, Gyr, ...)
signal_len = np.shape(sensor_data_filt[sig_names[0]])[0]

# window start time
win_start = 0 # [s]

# last window start time --> time where the minimum window length just fits into the sensor data
win_last_start = signal_len/sampling_rate - win_min_len

# number of different window sizes
num_win_sizes = len(np.arange(win_min_len, win_max_len+win_stretch_inc, win_stretch_inc))

# number of different window start points
num_start_points = len(np.arange(win_start, win_last_start, win_start_inc))

# dictionary with matrices to save predicted values for all classes
pred_matrix = {}
for ex in exercise_abbrs:
    pred_matrix[ex] = np.zeros([num_start_points, num_win_sizes])

# matrix with all generated features
feature_map = np.zeros([num_start_points * num_win_sizes, number_sections*6])

### Windowing the filtered signal

In [None]:
count = 0
max_count = len(feature_map)
prev_progress = 0 # previous progress

break_flag = False

# going through all window start points
for ii, win_pos in enumerate(np.arange(win_start, win_last_start, win_start_inc)):
    
    if break_flag is True:
        break
    
    # going through all window lengths  (+win_stretch_inc to include end point)
    for jj, win_len in enumerate(np.arange(win_min_len, win_max_len+win_stretch_inc, win_stretch_inc)):
        
        # calculate start and stop index
        start_index = win_pos * sampling_rate
        stop_index = start_index + (win_len * sampling_rate)
        
        # check if stop index is out of range
        if stop_index >= signal_len:
            stop_index = signal_len-1 # set equal to last index
        
        # get indices of the sections
        section_indices, step = np.linspace(start_index, stop_index, number_sections, endpoint=False, retstep=True)
        
        #  + step/2 in order to get the indices in the middle of the sections
        section_indices = (section_indices + step/2).round().astype(int)
        
        try:
            # putting the feature map together
            feature_map[count,:] = np.concatenate((sensor_data_filt[sig_names[0]][section_indices,:].transpose(), 
                                                   sensor_data_filt[sig_names[1]][section_indices,:].transpose())).flatten().reshape(1, -1)
        except:
            print(count)
            break_flag = True
            break
        count += 1
    
    prev_progress = fmpm.print_progress(count, max_count, prev_progress)
    
print('\nShape of feature map:')
np.shape(feature_map)

## Function to automatize procedure

In [18]:
def signal_windowing_via_indices(test_subject_path,
                                 number_sections=10,
                                 sig_names=['Acc','Gyr'],
                                 sampling_rate=256,
                                 cutoff=10,
                                 order=6,
                                 win_start_inc=0.2,
                                 win_stretch_inc=0.2,
                                 win_min_len=1,
                                 win_max_len=5,
                                 win_start=0,
                                 win_last_start=None):
    '''
    This function applies a defined windowing procedure in order to split a signal 
    into different sections, which can be then taken as features for machine learning.
    The different section values are determined by taking the index in the middle of
    the corresponding section.
    In order to avoid extreme outliers a butterworth filter is used before sectioning.
    
    Parameters
    ----------
    test_subject_path : str
        Path to the csv-file of the test subject data.
        
    number_sections: int
        Number of sections to split each window.
        
    sig_names : list of strings
        Signal names, used as keys for signal dictionaries.
        
    sampling_rate : int or float
        Sampling rate of the signals.
        
    cutoff : int or float
        Cutoff frequency of the butterworh filter.
        
    order : int
        Order of the butterworth filter.
        
    win_start_inc : int or float
        Start increment for the window [s].
        
    win_stretch_inc : int or float
        Stretch increment for the window [s].
    
    win_min_len : int or float
        Minimum window length [s].
    
    win_max_len : int or float
        Maximum window length [s].
    
    win_start : int or float
        Start time of the window [s].
    
    win_last_start : int or float or None
        Last start time of the window [s].
        If None, set to time where the minimum window length just fits into the sensor data.
    
    
    Returns
    -------
    list
        list[0] : numpy.ndarray
            Matrix with sectioned signal data.
                (Number of columns = number of features)
                (Number of rows = number of data points)
        list[1] : int
            Length of the original signals (number of indices).
    '''


    # get data from selected file
    sensor_data = fmpm.get_sensor_data(in_file=test_subject_path,
                                       signals=sig_names,
                                       sampling_rate=sampling_rate)

    # filter data with butterworth filter and save to new dictionary
    sensor_data_filt = {}
    for signal in sig_names:
        sensor_data_filt[signal] = fmpm.butter_lowpass_filter(sensor_data[signal], 
                                                              cutoff=cutoff, 
                                                              fs=sampling_rate, 
                                                              order=order)

    # signal length: all sensor data must have same length --> Acc, Gyr, ...
    # --> but to ensure that indices are not out of range in case of wrong input data
    # let's take the smallest stop index of the different signals
    signal_len = float('inf')
    for sig in sig_names:
        if np.shape(sensor_data_filt[sig])[0] < signal_len:
            signal_len = np.shape(sensor_data_filt[sig])[0]

    # last window start time --> time where the minimum window length just fits into the sensor data
    if win_last_start is None:
        win_last_start = signal_len/sampling_rate - win_min_len

    # number of different window sizes
    num_win_sizes = len(np.arange(win_min_len, win_max_len+win_stretch_inc, win_stretch_inc))

    # number of different window start points
    num_start_points = len(np.arange(win_start, win_last_start+win_start_inc, win_start_inc))

    # matrix with all generated features
    feature_map = np.zeros([num_start_points * num_win_sizes, number_sections*6])
    
    # count for current position in the feature map
    count = 0
    
    # variables for progress printing
    max_count = len(feature_map)
    prev_progress = 0 # previous progress

    # going through all window start points
    for ii, win_pos in enumerate(np.arange(win_start, win_last_start+win_start_inc, win_start_inc)):

        # going through all window lengths  (+win_stretch_inc to include end point)
        for jj, win_len in enumerate(np.arange(win_min_len, win_max_len+win_stretch_inc, win_stretch_inc)):

            # calculate start and stop index
            start_index = win_pos * sampling_rate
            stop_index = start_index + (win_len * sampling_rate)

            # check if stop index is out of range
            if stop_index >= signal_len:
                stop_index = signal_len-1 # set equal to last index

            # get indices of the sections
            section_indices, step = np.linspace(start_index, stop_index, number_sections, endpoint=False, retstep=True)

            #  + step/2 in order to get the indices in the middle of the sections
            section_indices = (section_indices + step/2).round().astype(int)

            # putting the feature map together
            feature_map[count,:] = np.concatenate((sensor_data_filt[sig_names[0]][section_indices,:].transpose(), 
                                                       sensor_data_filt[sig_names[1]][section_indices,:].transpose())).flatten().reshape(1, -1)

            count += 1
        
        # print progress of feauture map generation
        prev_progress = fmpm.print_progress(count, max_count, prev_progress)
    
    return feature_map

*** Test the function:***

In [12]:
feature_map = signal_windowing_via_indices(test_subject_path)

Progress: 100%


In [13]:
feature_map.size

12656700

In [15]:
np.shape(feature_map)

(210945, 60)