# Split Non-Exercise Data

In [1]:
import numpy as np
import pandas as pd
import os
import re
import matplotlib.pyplot as plt
from matplotlib.widgets import Button
import functionsMasterProjectMeinhart as fmpm

## Functions to automatize procedure

In [2]:
def convert_time_format_to_index(min_sec, sampling_rate, time_offset=0, max_index=None):
    '''
    Functions converts a string with the time format 'min:sec' (e.g. 5:17.2)
    to a corresponding index, considering the sampling rate.
    If index would be negative, 0 is returned.
    
    Inputs
    ------
    min_sec : string
        Time data, defined format: 'min:sec'
    
    sampling_rate : float or int
        Sampling rate for the index calculation. [Hz]
        
    time_offset : float of int
        Time offset, considered at the index calculation. [s]
        
    max_index : int
        Maximum valid index.
        If provided and calculated index is out of range,
        max_index is returned instead.
     
    Returns
    -------
    int
        Corresponding index to parameter 'min_max'.
    '''
    
    # split time string and convert to float
    minutes = float(min_sec.split(':')[0])
    seconds = float(min_sec.split(':')[1])
    
    # start and stop time in seconds
    time_s = minutes*60 + seconds + time_offset
    
    # get corresponding index
    index = round(time_s * sampling_rate)
    
    # ensure that index is not below 0
    if index < 0:
        index = 0
    
    # ensure that index is in valid range if max index is given
    if max_index is not None and index > max_index:
        index = max_index
            
    return index

In [3]:
def split_sensor_data_nonEx(time_file_dir = r'E:\Physio_Data\Exercise_time_tables',
                            time_file_name = 'Timetable_subject01.txt',
                            signal_file_dir  = r'E:\Physio_Data\Subject_01',
                            signal_file_name = 'subject01.csv',
                            save_dir  = r'E:\Physio_Data_Split_nonEx',
                            time_offset_before = 0,
                            time_offset_after = 0,
                            sampling_rate = 256):
    
    '''
    Function splits non-exercise signal data according to a txt-file with a timetable of predefined format:
    
    Name of exercise, sequence of repetitions, start and stop times of exerc. (one pair for each number of repetitions)
    
    Example:
    
    Raises Oblique	15	5	10	01:18.6	01:58.3	02:22.1	02:37.1	02:54.8	03:23.3
    PNF Diagonal 2	10	5	15	04:27.1	04:54.3	05:24.5	05:38.9	06:25.8	07:05.1
    Triceps Curls	15	5	10	07:32.3	08:14.8	08:49.5	09:04.9	09:46.1	10:12.6
    Rotation Wrist	5	10	15	10:43.1	10:57.3	11:25.6	11:51.8	12:12.1	12:52.4
    ...
    ...

    
    For non-exercise data only the time ranges between the exercises are relevant.
    
    Each splitted non-execeise section is then written to a csv-file, whose name contains:
    
    Subject number, sequence number, non-exercise info
    
    Example: subject01_01_nonEx.csv
    
    
    Inputs
    ------
    time_file_dir : directory of time timetable file 
    
    time_file_name : name of timetable file
    
    signal_file_dir : directory of signal file
    
    signal_file_name : name of signal file
    
    save_dir : directory to save splitted non-exercise data
    
    time_offset_before : opportunity to decrease start times [s]
    
    time_offset_after : opportunity to increase stop times [s]
    
    sampling_rate : sampling rate of the measured signal data
    
    
    Returns
    -------
    no returns
    '''
    
    # remember the subject number
    subject = re.split('[_.]',time_file_name)[1]
    
    # read in time table
    time_data_path = os.path.join(time_file_dir, time_file_name)
    time_data = pd.read_csv(time_data_path, skiprows=0, sep='\t', header=None)
    num_exercises = time_data.shape[0] # number of exercises

    # read in signal data
    signal_data_path = os.path.join(signal_file_dir, signal_file_name)
    signal_data = pd.read_csv(signal_data_path, skiprows=0, sep=',')
    num_data_points = signal_data.shape[0] #  number of data points
    
    
    # column of the table with the stop times
    stop_col = 4
    
    # column of the table with the start times
    start_col = 5
    
    # first start index is always 0
    start_index = 0
    
    # sequence number for non-exercise data
    seq_num = 0
    
    # give time_offset_before a negative sign if it isn't alreda there
    if time_offset_before > 0:
            time_offset_before = -time_offset_before
    
    # split data according to the timetable and save non-exercise data to a corresponding csv-file:

    # go through all exercises
    for num_ex in range(num_exercises):
        
        # for loop for different sections between the exercise data
        for jj in range(3): 
            
            # get stop index (start index is already know here)
            stop_index = convert_time_format_to_index(min_sec = time_data.values[num_ex,stop_col+2*jj], 
                                                      sampling_rate = sampling_rate, 
                                                      time_offset = time_offset_after, 
                                                      max_index = num_data_points)

            # select corresponding signal data (from Pandas DataFrame)
            signal_data_selected = signal_data.iloc[start_index:stop_index+1] # +1 to include stop index

            # put out-file name together (subject number + sequence number with leading 0 + 'nonEx')
            out_file_name = subject \
                             + '_' + str(seq_num).zfill(2) \
                             + '_nonEx.csv'

            # join save directory and out-file name
            out_file_path = os.path.join(save_dir, out_file_name)

            # save seleceted data as csv-file
            signal_data_selected.to_csv(out_file_path, sep=',')
            
            # get the start index for the next run
            start_index = convert_time_format_to_index(min_sec = time_data.values[num_ex,start_col+2*jj], 
                                                      sampling_rate = sampling_rate, 
                                                      time_offset = time_offset_before, 
                                                      max_index = num_data_points)
            
            # increase sequence number
            seq_num += 1
            
    # after the two for loops one non-exercise data set is still left --> signal until the end
    signal_data_selected = signal_data.iloc[start_index:] # select all data until end

    # put out-file name together (subject number + sequence number with leading 0 + 'nonEx')
    out_file_name = subject \
                     + '_' + str(seq_num).zfill(2) \
                     + '_nonEx.csv'

    # join save directory and out-file name
    out_file_path = os.path.join(save_dir, out_file_name)

    # save seleceted data as csv-file
    signal_data_selected.to_csv(out_file_path, sep=',')

## Apply the function

In [23]:
# apply it for all subjects (except #10 --> there is no timetable)
subject_nums = ['01','02','03','04','05','06','07','08','09']

for subject_num in subject_nums:
    split_sensor_data_nonEx(time_file_dir = r'E:\Physio_Data\Exercise_time_tables',
                            time_file_name = 'Timetable_subject' + subject_num + '.txt',
                            signal_file_dir  = r'E:\Physio_Data\Subject_' + subject_num,
                            signal_file_name = 'subject' + subject_num + '.csv',
                            save_dir  = r'E:\Physio_Data_Split_nonEx',
                            time_offset_before = 0,
                            time_offset_after = 0,
                            sampling_rate = 256)

## Read and plot the split data

In [4]:
subject_nums = ['01','02','03','04','05','06','07','08','09'] # (except #10 --> there is no timetable)
sequence_nums = [str(ii).zfill(2) for ii in range(31)] #  in total 31 sequence numbers per subject (00 ... 30)

file_dir = r'E:\Physio_Data_Split_nonEx'

# create big nested dictionary for all data
allData = {}

for sub_num in subject_nums:
    allData[sub_num] = {}
    
    for seq_num in sequence_nums:
        allData[sub_num][seq_num] = {}

        # put file name together
        file_name = 'subject' + sub_num + '_' + seq_num + '_nonEx.csv'

        # join directory and file name
        file_path = os.path.join(file_dir, file_name)

        allData[sub_num][seq_num] =  fmpm.get_sensor_data(in_file=file_path, 
                                     signals=['Acc','Gyr','Mag'], 
                                     sampling_rate=256, 
                                     start_time=None, 
                                     stop_time=None, 
                                     skip_rows=0, 
                                     sep=',',
                                     add_info=file_name)

In [5]:
%matplotlib auto

Using matplotlib backend: TkAgg


In [7]:
fig, axs = plt.subplots(2,1, sharex=True, figsize=(18,9))
plt.subplots_adjust(bottom=0.2, hspace=0.4) # make space for buttons


class Index(object):
    
    ind_sub = 0
    ind_nonEx = 0

    def next_subject(self, event):
        self.ind_sub = (self.ind_sub + 1) % len(subject_nums)
        self.plot_signals()

    def prev_subject(self, event):
        self.ind_sub = (self.ind_sub - 1) % len(subject_nums)
        self.plot_signals()
        
    def next_nonEx(self, event):
        self.ind_nonEx = (self.ind_nonEx + 1) % len(sequence_nums)
        self.plot_signals()

    def prev_nonEx(self, event):
        self.ind_nonEx = (self.ind_nonEx - 1) % len(sequence_nums)
        self.plot_signals()
        
    def plot_signals(self):
                
        fig.suptitle('Subject: ' + subject_nums[self.ind_sub] + \
                     '     Sequence #' + sequence_nums[self.ind_nonEx], fontsize=22, fontweight='bold')
       
        # plot linear acceleration
        plt.sca(axs[0])
        axs[0].clear()
        fmpm.plot_signal(allData[subject_nums[self.ind_sub]][sequence_nums[self.ind_nonEx]]['Acc'],
                         allData[subject_nums[self.ind_sub]][sequence_nums[self.ind_nonEx]]['time'],
                         Title='Acceleration')

        # plot angular velocity
        plt.sca(axs[1])
        axs[1].clear()
        fmpm.plot_signal(allData[subject_nums[self.ind_sub]][sequence_nums[self.ind_nonEx]]['Gyr'],
                         allData[subject_nums[self.ind_sub]][sequence_nums[self.ind_nonEx]]['time'],
                         Title='Angular Velocity',
                         yLabel=r'$vel \enspace [\frac{deg}{s}]$')
        
        plt.draw()


callback = Index()
callback.plot_signals()

ax_prev_sub = plt.axes([0.57, 0.05, 0.15, 0.075])
ax_next_sub = plt.axes([0.74, 0.05, 0.15, 0.075])
ax_prev_nonEx =  plt.axes([0.15, 0.05, 0.15, 0.075])
ax_next_nonEx =  plt.axes([0.32, 0.05, 0.15, 0.075])

b_next_sub = Button(ax_next_sub, 'Next Subject')
b_next_sub.on_clicked(callback.next_subject)
b_prev_sub = Button(ax_prev_sub, 'Previous Subject')
b_prev_sub.on_clicked(callback.prev_subject)

b_next_nonEx = Button(ax_next_nonEx, 'Next nonEx')
b_next_nonEx.on_clicked(callback.next_nonEx)
b_prev_nonEx = Button(ax_prev_nonEx, 'Previous nonEx')
b_prev_nonEx.on_clicked(callback.prev_nonEx)

plt.show()