# Split Data from CSV File

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.widgets import Button
import os
import re
import functionsMasterProjectMeinhart as fmpm

In [2]:
# select file with timetable (csv)
time_file_dir = r'E:\Physio_Data\Exercise_time_tables'
time_file_name = 'Timetable_subject01.txt'
time_data_path = os.path.join(time_file_dir, time_file_name)

In [7]:
# read in time table
time_data = pd.read_csv(time_data_path, skiprows=0, sep='\t', header=None)
num_exercises = time_data.shape[0] # number of exercises
time_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,Raises Oblique,15,5,10,01:18.6,01:58.3,02:22.1,02:37.1,02:54.8,03:23.3
1,PNF Diagonal 2,10,5,15,04:27.1,04:54.3,05:24.5,05:38.9,06:25.8,07:05.1
2,Triceps Curls,15,5,10,07:32.3,08:14.8,08:49.5,09:04.9,09:46.1,10:12.6
3,Rotation Wrist,5,10,15,10:43.1,10:57.3,11:25.6,11:51.8,12:12.1,12:52.4
4,Raises Front,10,15,5,13:44.8,14:14,14:40.6,15:20,16:09.5,16:23.1
5,Biceps Curls,15,10,5,16:50.8,17:30.2,18:01.3,18:32.2,18:58.6,19:12
6,Raises Side,15,5,10,20:13,21:00.2,21:24.9,21:40.2,22:09.6,22:38.4
7,PNF Diagonal 1,10,15,5,23:43.9,24:15.9,24:42,25:25.1,25:48.7,26:04.6
8,Shoulder Adduct.,5,15,10,26:48.5,27:05.3,27:25.4,28:16.5,28:41.2,29:10.9
9,Military Press,15,10,5,29:45.6,30:37.4,31:14.6,31:47.3,32:13.6,32:28.1


In [6]:
# select and read in corresponding signal data
signal_file_dir  = r'E:\Physio_Data\Subject_01'
signal_file_name = 'subject01.csv'
signal_data_path = os.path.join(signal_file_dir, signal_file_name)

signal_data = pd.read_csv(signal_data_path, skiprows=0, sep=',')
signal_data.head()

Unnamed: 0,Packet number,Gyroscope X (deg/s),Gyroscope Y (deg/s),Gyroscope Z (deg/s),Accelerometer X (g),Accelerometer Y (g),Accelerometer Z (g),Magnetometer X (G),Magnetometer Y (G),Magnetometer Z (G)
0,113,-0.0625,-0.0625,-0.1875,0.019043,0.008789,1.011719,0.346191,-0.486328,-0.728516
1,114,-0.0625,0.0,-0.125,0.01123,0.004883,1.015625,0.180664,-0.166992,-0.35791
2,115,0.0625,-0.0625,-0.1875,0.019043,0.004883,1.007813,0.179199,-0.167969,-0.35791
3,116,-0.0625,-0.0625,0.0,0.015137,0.004883,1.003906,0.179199,-0.167969,-0.35791
4,117,0.0,0.125,-0.0625,0.01123,0.012695,1.007813,0.180664,-0.167969,-0.35791


## Function to automatize procedure

In [3]:
def split_sensor_data(time_file_dir = r'E:\Jupyter_Notebooks\Master_Project_Meinhart\Exercise_time_tables',
                      time_file_name = 'Timetable_subject01.txt',
                      signal_file_dir  = r'E:\Physio_Data\Subject_01',
                      signal_file_name = 'subject01.csv',
                      save_dir  = r'E:\Physio_Data_Split',
                      time_offset_before = 0,
                      time_offset_after = 0,
                      sampling_rate = 256):
    
    '''
    Function splits signal data according to a txt-file with a timetable of predefined format:
    
    Name of exercise, sequence of repetitions, start and stop times (one pair for each number of repetitions)
    
    Example:
    
    Raises Oblique	15	5	10	01:18.6	01:58.3	02:22.1	02:37.1	02:54.8	03:23.3
    PNF Diagonal 2	10	5	15	04:27.1	04:54.3	05:24.5	05:38.9	06:25.8	07:05.1
    Triceps Curls	15	5	10	07:32.3	08:14.8	08:49.5	09:04.9	09:46.1	10:12.6
    Rotation Wrist	5	10	15	10:43.1	10:57.3	11:25.6	11:51.8	12:12.1	12:52.4
    ...
    ...


    Each splitted section is then written to a csv-file, whose name contains:
    
    Subject number, abbreviation of exercise, number of repetitions
    
    Example: subject01_RF_05.csv
    
    
    Inputs
    ------
    time_file_dir: directory of time timetable file 
    
    time_file_name: name of timetable file
    
    signal_file_dir: directory of signal file
    
    signal_file_name: name of signal file
    
    save_dir: directory to save splitted data
    
    time_offset_before: opportunity to decrease start times [s]
    
    time_offset_after: opportunity to increase stop times [s]
    
    sampling_rate: sampling rate of the measured signal data
    
    
    Returns
    -------
    no returns
    '''
    
    # dictionary for exercise abbreviations
    exercise_abbr = {}
    exercise_abbr['Raises Front'] = 'RF'
    exercise_abbr['Raises Oblique'] = 'RO'
    exercise_abbr['Raises Side'] = 'RS'
    exercise_abbr['Rotation Wrist'] = 'LR'
    exercise_abbr['Biceps Curls'] = 'BC'
    exercise_abbr['Triceps Curls'] = 'TC'
    exercise_abbr['Military Press'] = 'MP'
    exercise_abbr['Shoulder Adduct.'] = 'SA'
    exercise_abbr['PNF Diagonal 1'] = 'P1'
    exercise_abbr['PNF Diagonal 2'] = 'P2'
    
    # remember the subject number
    subject = re.split('[_.]',time_file_name)[1]
    
    # read in time table
    time_data_path = os.path.join(time_file_dir, time_file_name)
    time_data = pd.read_csv(time_data_path, skiprows=0, sep='\t', header=None)
    num_exercises = time_data.shape[0] # number of exercises

    # read in signal data
    signal_data_path = os.path.join(signal_file_dir, signal_file_name)
    signal_data = pd.read_csv(signal_data_path, skiprows=0, sep=',')
    num_data_points = signal_data.shape[0] #  number of data points
    
    
    # split data according to the timetable and save each exercise to a corresponding csv-file:

    # go through all exercises
    for num_ex in range(num_exercises):

        # for loop for different numbers of repetitions (columns --> 1,2,3)
        for jj in range(3): 

            # selecet time range [min:sec]
            start_min_sec = time_data.values[num_ex,4+2*jj] # 4+.. column --> start time
            stop_min_sec  = time_data.values[num_ex,5+2*jj] # 5+.. column --> stop time

            # split time string and convert to float
            start_min = float(start_min_sec.split(':')[0])
            start_sec = float(start_min_sec.split(':')[1])
            stop_min = float(stop_min_sec.split(':')[0])
            stop_sec = float(stop_min_sec.split(':')[1])

            # start and stop time in seconds
            start_time = start_min*60 + start_sec - time_offset_before # [s]
            stop_time = stop_min*60 + stop_sec + time_offset_after # [s]

            # get corresponding start and stop indices
            start_index = round(start_time * sampling_rate)
            stop_index = round(stop_time * sampling_rate)

            # ensure that indices are in valid range
            if start_index < 0:
                start_index = 0
            if stop_index >= num_data_points:
                stop_index = num_data_points-1 # end index

            # select corresponding signal data (from Pandas DataFrame)
            signal_data_selected = signal_data.iloc[start_index:stop_index+1] # +1 to include stop index

            # put out-file name together (subject number + abbreviation of exercise + number of rep. with leading 0)
            out_file_name = subject \
                             + '_' + exercise_abbr[time_data.values[num_ex,0]] \
                             + '_' + str(time_data.values[num_ex,jj+1]).zfill(2) \
                             + '.csv'

            # join save directory and out-file name
            out_file_path = os.path.join(save_dir, out_file_name)

            # save seleceted data as csv-file
            signal_data_selected.to_csv(out_file_path, sep=',')
    


## Apply the function

In [2]:
# apply it for all subjects (except #10 --> there is no timetable)
subject_nums = ['01','02','03','04','05','06','07','08','09']

for subject_num in subject_nums:
    fmpm.split_sensor_data(time_file_dir = r'E:\Physio_Data\Exercise_time_tables',
                        time_file_name = 'Timetable_subject' + subject_num + '.txt',
                        signal_file_dir  = r'E:\Physio_Data\Subject_' + subject_num,
                        signal_file_name = 'subject' + subject_num + '.csv',
                        save_dir  = r'E:\Physio_Data_Split_Exercise',
                        time_offset_before = 2,
                        time_offset_after = 2,
                        sampling_rate = 256)

## Read and plot the split data

In [3]:
subject_nums = ['01','02','03','04','05','07','08','09'] # subjects 06 and 10: no valid timetables
exercise_abbrs  = ['RF','RO','RS','LR','BC','TC','MP','SA','P1','P2']
repetitions = ['05','10','15']

file_dir = r'E:\Physio_Data_Split_Exercise_done'

# create big nested dictionary for all data
allData = {}

for sub_num in subject_nums:
    allData[sub_num] = {}
    
    for ex_abb in exercise_abbrs:
        allData[sub_num][ex_abb] = {}
        
        for rep in repetitions:
            allData[sub_num][ex_abb][rep] = {}
            
            # put file name together
            file_name = 'subject' + sub_num + '_' + ex_abb + '_' + rep + '.csv'
            
            # join directory and file name
            file_path = os.path.join(file_dir, file_name)
            
            allData[sub_num][ex_abb][rep] =  fmpm.get_sensor_data(in_file=file_path, 
                                             signals=['Acc','Gyr','Mag'], 
                                             sampling_rate=256, 
                                             start_time=None, 
                                             stop_time=None, 
                                             skip_rows=0, 
                                             sep=',',
                                             add_info=file_name)
            

In [4]:
%matplotlib auto

Using matplotlib backend: TkAgg


In [5]:
# dictionary for exercises
exercise = {}
exercise['RF'] = 'Raises Front'
exercise['RO'] = 'Raises Oblique'
exercise['RS'] = 'Raises Side'
exercise['LR'] = 'Rotation Wrist'
exercise['BC'] = 'Biceps Curls'
exercise['TC'] = 'Triceps Curls'
exercise['MP'] = 'Military Press'
exercise['SA'] = 'Shoulder Adduct.'
exercise['P1'] = 'PNF Diagonal 1'
exercise['P2'] = 'PNF Diagonal 2'


subject_nums = ['01','02','03','04','05','07','08','09'] # subjects 06 and 10: no valid timetables
exercise_abbrs  = ['RF','RO','RS','LR','BC','TC','MP','SA','P1','P2']
repetitions = ['05','10','15']


fig, axs = plt.subplots(3,2, sharex=True, figsize=(18,9))
plt.subplots_adjust(bottom=0.2, hspace=0.4) # make space for buttons


class Index(object):
    
    ind_sub = 0
    ind_ex = 0

    def next_subject(self, event):
        self.ind_sub = (self.ind_sub + 1) % len(subject_nums)
        self.plot_signals()

    def prev_subject(self, event):
        self.ind_sub = (self.ind_sub - 1) % len(subject_nums)
        self.plot_signals()
        
    def next_exercise(self, event):
        self.ind_ex = (self.ind_ex + 1) % len(exercise_abbrs)
        self.plot_signals()

    def prev_exercise(self, event):
        self.ind_ex = (self.ind_ex - 1) % len(exercise_abbrs)
        self.plot_signals()
        
    def plot_signals(self):
                
        fig.suptitle('Subject: ' + subject_nums[self.ind_sub] + \
                     '     Exercise: ' + exercise[exercise_abbrs[self.ind_ex]], fontsize=22, fontweight='bold')
       
        # plot linear acceleration
        plt.sca(axs[0,0])
        axs[0,0].clear()
        fmpm.plot_signal(allData[subject_nums[self.ind_sub]][exercise_abbrs[self.ind_ex]][repetitions[0]]['Acc'],
                         allData[subject_nums[self.ind_sub]][exercise_abbrs[self.ind_ex]][repetitions[0]]['time'],
                         Title='Acceleration ('+repetitions[0]+' rep.)',
                         xLabel='')
        
        plt.sca(axs[1,0])
        axs[1,0].clear()
        fmpm.plot_signal(allData[subject_nums[self.ind_sub]][exercise_abbrs[self.ind_ex]][repetitions[1]]['Acc'],
                         allData[subject_nums[self.ind_sub]][exercise_abbrs[self.ind_ex]][repetitions[1]]['time'],
                         Title='Acceleration ('+repetitions[1]+' rep.)',
                         xLabel='')
        
        plt.sca(axs[2,0])
        axs[2,0].clear()
        fmpm.plot_signal(allData[subject_nums[self.ind_sub]][exercise_abbrs[self.ind_ex]][repetitions[2]]['Acc'],
                         allData[subject_nums[self.ind_sub]][exercise_abbrs[self.ind_ex]][repetitions[2]]['time'],
                         Title='Acceleration ('+repetitions[2]+' rep.)')

        # plot angular velocity
        plt.sca(axs[0,1])
        axs[0,1].clear()
        fmpm.plot_signal(allData[subject_nums[self.ind_sub]][exercise_abbrs[self.ind_ex]][repetitions[0]]['Gyr'],
                         allData[subject_nums[self.ind_sub]][exercise_abbrs[self.ind_ex]][repetitions[0]]['time'],
                         Title='Angular Velocity ('+repetitions[0]+' rep.)',
                         yLabel=r'$vel \enspace [\frac{deg}{s}]$',
                         xLabel='')
        
        plt.sca(axs[1,1])
        axs[1,1].clear()
        fmpm.plot_signal(allData[subject_nums[self.ind_sub]][exercise_abbrs[self.ind_ex]][repetitions[1]]['Gyr'],
                         allData[subject_nums[self.ind_sub]][exercise_abbrs[self.ind_ex]][repetitions[1]]['time'],
                         Title='Angular Velocity ('+repetitions[1]+' rep.)',
                         yLabel=r'$vel \enspace [\frac{deg}{s}]$',
                         xLabel='')
        
        plt.sca(axs[2,1])
        axs[2,1].clear()
        fmpm.plot_signal(allData[subject_nums[self.ind_sub]][exercise_abbrs[self.ind_ex]][repetitions[2]]['Gyr'],
                         allData[subject_nums[self.ind_sub]][exercise_abbrs[self.ind_ex]][repetitions[2]]['time'],
                         Title='Angular Velocity ('+repetitions[2]+' rep.)',
                         yLabel=r'$vel \enspace [\frac{deg}{s}]$')
        
        plt.draw()


callback = Index()
callback.plot_signals()

ax_prev_sub = plt.axes([0.57, 0.05, 0.15, 0.075])
ax_next_sub = plt.axes([0.74, 0.05, 0.15, 0.075])
ax_prev_ex =  plt.axes([0.15, 0.05, 0.15, 0.075])
ax_next_ex =  plt.axes([0.32, 0.05, 0.15, 0.075])

b_next_sub = Button(ax_next_sub, 'Next Subject')
b_next_sub.on_clicked(callback.next_subject)
b_prev_sub = Button(ax_prev_sub, 'Previous Subject')
b_prev_sub.on_clicked(callback.prev_subject)

b_next_ex = Button(ax_next_ex, 'Next Exercise')
b_next_ex.on_clicked(callback.next_exercise)
b_prev_ex = Button(ax_prev_ex, 'Previous Exercise')
b_prev_ex.on_clicked(callback.prev_exercise)

plt.show()

In [54]:
allData['01']['RF']['05']['Acc']

array([[-0.9716797,  1.121094 ,  0.2299805],
       [-0.9755859,  1.105469 ,  0.2182617],
       [-0.9794922,  1.082031 ,  0.206543 ],
       ...,
       [-0.1523438,  0.1884766,  0.8417969],
       [-0.1601563,  0.2001953,  0.8496094],
       [-0.1640625,  0.2158203,  0.8496094]])