# Class for Physio Data - Section Features

In [81]:
from IPython.display import clear_output
from pivottablejs import pivot_ui
import skinematics as skin
import pandas as pd
import numpy as np
import sqlite3
import os

In [82]:
def load_data_from_database(data_base_path='DataBase_Physio_with_nonEx.db'):
    '''
    Function to load the following data from data base:
        - subject IDs
        - exercise abbreviations
        - number of repetitions
        - sequence numbers
        - start times
        - stop times
        - csv-file name
    
    
    Parameters
    ----------
    data_base_path : string
        Path to data base.
    
    
    Returns
    -------
    DataFrame
        DataFrame with the listet information (see above).
    '''
    # Connect to an existing database
    conn = sqlite3.connect(data_base_path)
    cur = conn.cursor()

    # sql command to extract data
    query_sql = """
        SELECT e.subject_id,
        p.abbreviation,
        e.num_rep,
        r.sequence_num,
        r.start_time, r.stop_time,
        e.csv_file
        FROM subjects s
        INNER JOIN exercises e
        ON s.id = e.subject_id
        INNER JOIN paradigms p
        ON p.id = e.paradigm_id
        INNER JOIN repetitions r
        ON e.id = r.exercise_id
        """
    
    # get data from data base and close connection
    all_data_points_df = pd.read_sql_query(query_sql, conn)
    conn.close()
    
    return all_data_points_df

In [83]:
def select_data_points_from_df(all_data_points_df,
                               subject_ids=-1,
                               subject_ids_complementary=[],
                               reps=-1,
                               abbrs=-1,
                               with_non_Ex=True,
                               sub_id_key='subject_id',
                               num_rep_key='num_rep',
                               abbreviation_key='abbreviation'):
    '''
    Function to select data points from a DataFrame based on subject IDs,
    number of repetitions and exercise abbreviations.
    
    
    Parameters
    ----------
    all_data_points_df : pandas DataFrame
        DataFrame with all data points.
    
    subject_ids : int or list
        Subject IDs to select (e.g. [1, 2, 3]).
        --> default -1: Select all subjects not in subject_ids_complementary.
        --> if subject_ids is an empty list: empty DataFrame is returned
        
    subject_ids_complementary : int or list
        If subject_ids is -1 --> select all subjects not in subject_ids_complementary.
        
    reps : int or list
        Repetition numbers to select (e.g. [5, 10]).
        --> default -1: Select all repetitions.
        
    abbrs : int or list
        Exercise abbreviations to select (e.g. ['RF', 'SA']).
        --> default -1: Select all exercise abbreviations.
    
    with_non_Ex : boolean
        If False --> omit non exercise data (data points with zero repetitions).
        
    sub_id_key : string
        Key of the DataFrame for subject IDs.
        
    num_rep_key : string
        Key of the DataFrame for repetition numbers.
        
    abbreviation_key : string
        Key of the DataFrame for exercise abbreviations.
    
    
    Returns
    -------
    DataFrame
        DataFrame with selected data points.
    '''
    
    data_points_df = all_data_points_df.copy()
    
    # check if subject_ids is an empty list --> return and empty DataFrame in this case
    if isinstance(subject_ids, list) and not subject_ids:
        return pd.DataFrame()
    
    # select the subject IDs
    # if subject_ids is -1 --> select data from all subjects that are NOT in subject_ids_complementary
    if subject_ids is -1:
        if not isinstance(subject_ids_complementary, list): # if not list --> make list
            subject_ids_complementary = [subject_ids_complementary]
        data_points_df = data_points_df.loc[~data_points_df[sub_id_key].isin(subject_ids_complementary)]
        
    elif subject_ids is not -1:
        if not isinstance(subject_ids, list): # if not list --> make list
            subject_ids = [subject_ids]
        data_points_df = data_points_df.loc[data_points_df[sub_id_key].isin(subject_ids)]

    # select the repetition numbers
    if reps is not -1:
        if not isinstance(reps, list): # if not list --> make list
            reps = [reps]
        if with_non_Ex is True:
            reps.append(0) # zero repetitions correspond to non exercise data
        data_points_df = data_points_df.loc[data_points_df[num_rep_key].isin(reps)]

    elif with_non_Ex is False:
        data_points_df = data_points_df.loc[data_points_df[num_rep_key] != 0]
        
    # select the exercise abbreviations
    if abbrs is not -1:
        if not isinstance(abbrs, list): # if not list --> make list
            abbrs = [abbrs]
        data_points_df = data_points_df.loc[data_points_df[abbreviation_key].isin(abbrs)]

    return data_points_df

In [84]:
def split_range_into_sections(signal_data, num_sec=10, signals=['Acc','Gyr'], start_index=0, stop_index=None):
    '''
    This function splits a selected range of the input signals into a defined number 
    of equally distributed sections. For each signal and section the mean is calculated,
    and afterwards returned by means of a dictionary.
    
    
    Parameters
    ----------
    signal_data : dict
        Dictionary with the signals in the 'signals' argument as keys.
        The signal arrays must have same length.
    
    num_sec : int
        Number of sections to split the signals.
        
    signals : list
        Keys to select signals in the signal_data dictionary.
        If no keys are provided, all keys of the signal_data
        dictionary are taken.
        
    start_index : int
        Start index of selected range (default=0).
    
    stop_index : int
        Stop index of selected range.
        If not given --> take length of signal data.
    
    
    Returns
    -------
    Dictionary with section means for each signal --> keys are same as the selected
    in the list "signals".
    '''
    
    # if no signals are given as keys, select all keys of the input dictionary
    if not signals:
        signals = [*signal_data]
    
    # number of input data points of each signal (signals have to be of the same length --> take index 0)
    len_signals = np.shape(signal_data[signals[0]])[0]
    
    # check if stop index is given
    if stop_index is None:
        stop_index = len_signals
    
    # get indices of the sections (+1 due to start and end index of each section)
    sec_ind = np.linspace(start_index, stop_index, num_sec+1).round().astype(int)
    
    # dicitonary to save sections means for each signal
    section_means = {}

    for signal in signals:
        # generate row with zeros in order to use np.vstack afterwards
        section_means[signal] = np.zeros([1, np.shape(signal_data[signal])[1]])

        # add the mean of each section
        for ii in range(len(sec_ind)-1):
            section_means[signal] = np.vstack([section_means[signal], 
                                               np.mean(signal_data[signal][sec_ind[ii]:sec_ind[ii+1]], axis=0)])

        # delete the first row with the zeros
        section_means[signal] = np.delete(section_means[signal], 0, axis=0)
        
    return section_means

In [85]:
def print_progress_func(current_num, max_num, prev_prog, add_info=None):
    '''
    Function to print progress [%] in a loop.
    
    
    Parameters
    ----------
    current_num : int
        Number of the current run in a loop.
        
    max_num : int
        Maximum number of runs in a loop.
        
    prev_prog : int
        Previous progress, to print only if necessary.
        
    add_info : str
        Additional information to print instead of "Progress".
    
    
    Returns
    -------
    int
        Previous progress, important for next run.
    '''
    new_prog = int(current_num/max_num*100)
    
    if new_prog > prev_prog:
        clear_output(wait=True)
        
        if isinstance(add_info, str):
            print(add_info + ' {:3d}%'.format(new_prog))
        else:
            print('Progress: {:3d}%'.format(new_prog))
        
    return new_prog

In [86]:
def rotate_signal(signal_data, axis=0, rot_angle=90, signals=['Acc','Gyr']):
    '''
    Function to rotate signals around x, y or z-axis.
    
    
    Parameters
    ----------
    signal_data : dict
        Dictionary with the signals in the 'signals' argument as keys.
        The signal arrays must have three columns (x, y, z).
        
    axis : int
        Axis for rotation:
        0, 1 or 2 --> x, y or z
        
    rot_angle : int or float
        Rotation angle in degree.
        
    signals : list of strings
        Names of the signals, which shall be considered for rotation (e.g. ['Acc','Gyr']).


    Returns
    -------
    Dictionary with rotated selected signals.
    (Same structure as input signal dictionary.)

    '''
    # if no signals are given as keys, select all keys of the input dictionary
    if signals is None:
        signals = [*signal_data]
    
    # create rotation matrix
    R = skin.rotmat.R(axis=axis, angle=rot_angle)
    
    # dictionary for rotated data
    rot_signal_data = {}
    
    # rotate the signals
    for sig in signals: 
        rot_signal_data[sig] = (R @ signal_data[sig].T).T
        
    return rot_signal_data

In [87]:
def add_noise_to_signal(signal_data, target_snr_db=20, signals=['Acc','Gyr'], signal_orientations=['x','y','z']):
    '''
    Function to add Additive White Gaussian Noise (AWGN) to all signals with a defined SNR.
    
    Used formulas:
    SNR = P_signal / P_noise
    SNR_db = 10 * log10(P_signal / P_noise)
    SNR_db = P_signal_db - P_noise_db
    
    
    Parameters
    ----------
    signal_data : dict
        Dictionary with the signals in the 'signals' argument as keys.
        
    target_snr_db : int or float
        Target signal to noise ration in db.
        
    signals : list of strings
        Names of the signals, which shall be considered for rotation (e.g. ['Acc','Gyr']).
        
    signal_orientations : list of strings
        Orientations of the signals (e.g. ['x','y','z']).
    
    
    Returns
    -------
    Dictionary with noisy signals.
    (Same structure as input signal dictionary.)

    '''
    # if no signals are given as keys, select all keys of the input dictionary
    if signals is None:
        signals = [*signal_data]
    
    # dictionary for noisy data
    noisy_signal_data = {}
    
    # adding noise using target SNR
    for sig in signals:
        
        # fill in old values
        noisy_signal_data[sig] = np.zeros(np.shape(signal_data[sig]))
        
        for ii in range(len(signal_orientations)):
            
            P_signal_watts = signal_data[sig][:,ii] ** 2 # get power of the signal [watts]
            P_signal_mean_watts = np.mean(P_signal_watts) # get mean
            P_signal_mean_db = 10 * np.log10(P_signal_mean_watts) # convert to db
            
            P_noise_mean_db = P_signal_mean_db - target_snr_db # get corresponding noise power
            P_noise_mean_watts = 10 ** (P_noise_mean_db/10) # convert from db to watts
            noise_mean_std = np.sqrt(P_noise_mean_watts) # std of noise (P_noise_mean_watts is variance)
            
            # generate sample of white noise (power = variance = P_noise_mean_watts)
            noise = np.random.normal(0, noise_mean_std, len(signal_data[sig][:,ii]))
            
            # add noise to original signal
            noisy_signal_data[sig][:,ii] = signal_data[sig][:,ii] + noise
    
    # just for testing
    #for sig in signals:
    #    f, ax = plt.subplots(1, 2, figsize=(15,5))
    #    ax[0].plot(signal_data[sig])
    #    ax[1].plot(noisy_signal_data[sig])
    
    return noisy_signal_data

In [88]:
def generate_section_features_from_separate_repetitions(data_points_df,
            num_sections=10,
            csv_data_dir='E:\Physio_Data_Split_Ex_and_NonEx',
            csv_skiprows=0,
            csv_separator=',',
            signal_abbrs=['Acc','Gyr'],
            rot_axis=0,
            rot_angle=0,
            add_noise=False,
            target_snr_db=20,
            signal_orientations=['x','y','z'],
            labels_abbr2num_dict={'RF':0,'RO':1,'RS':2,'LR':3,'BC':4,'TC':5,'MP':6,'SA':7,'P1':8,'P2':9,'NE':10},
            sampling_rate=256,
            abbreviation_key='abbreviation',
            start_time_key='start_time',
            stop_time_key='stop_time',
            csv_file_key='csv_file',
            print_progress=True,
            progress_info='Generate features...'):
    '''
    Function to generate section mean features from separate repetitions, 
    which are given by the input DataFrame.
    
    
    Parameters
    ----------
    data_points_df : DataFrame
        DataFrame with information about data points (see load_data_from_database()).
        
    num_sections : int
        Number of sections to split the signals.
        
    csv_data_dir : string
        Directory of signal data csv-files.
        
    csv_skiprows : int
        Number of rows to skip for signal data csv-files.
        
    csv_separator : string
        Separator for signal data csv-files.
        
    signal_abbrs : list of strings
        Abbreviations of the signals (e.g. ['Acc','Gyr']).
    
    rot_axis : int
        Axis for rotation:
        0, 1 or 2 --> x, y or z
        
    rot_angle : int or float
        Rotation angle in degree.
        
    add_noise : boolean
        If True --> noise is added to signals.
        
    target_snr_db : int or float
        Signal to noise ratio in db for the generated noisy signals.
    
    signal_orientations : list of strings
        Orientations of the signals (e.g. ['x','y','z']).
        
    labels_abbr2num_dict : dict
        Dictionary to convert exercise abbreviations to number (e.g. ={'RF':0,'RO':1,'RS':2, ... }).
        
    sampling_rate : int or float
        Sampling rate of the signals in Hz.
    
    abbreviation_key : strings
        Exercise abbreviation key for DataFrame which contains data base entries.
        
    start_time_key : strings
        Start time key for DataFrame which contains data base entries.
        
    stop_time_key : strings
        Stop time key for DataFrame which contains data base entries.
        
    csv_file_key : strings
        csv-file key for DataFrame which contains data base entries.
        
    print_progress : boolean
        If True --> print progress at feature generation.
        
    progress_info : strings
        Additional information to print with progress.
        
    
    Returns
    -------
    X_df, y_df
        
        X_df ... DataFrame with section means of each signal
               e.g.  Acc_x_01    Acc_x_02    Acc_x_03  ...
               0    -0.939115   -0.851133   -0.074181  ...
               1    -0.928223   -1.003425   -0.495449  ...
               2    -0.896511   -0.949733   -0.381539  ...
               ...  ...         ...         ...
        
        y_df ... DataFrame with labels
               e.g.        ex_abbr    ex_num
                        0    RF         0
                        1    RF         0
                        2    RO         1
    '''
    
    # dictionary to convert number to exercise abbreviation
    labels_num2abbr_dict = {num: abbr for abbr, num in labels_abbr2num_dict.items()}
                                                   
    # create DataFrame for labels
    y_df = pd.DataFrame(np.zeros((len(data_points_df), 2), dtype=np.int8), columns=['ex_abbr', 'ex_num']) 
    
    # generate the column names of the feature matrix (depending on number of section means)
    X_columns = []
    for sig in signal_abbrs:
        for xyz in signal_orientations:
            for sec_num in range(num_sections):
                # append the current column name
                X_columns.append(sig + '_' + xyz + '_{:02d}'.format(sec_num+1))

    # create DataFrame for features
    X_df = pd.DataFrame(np.zeros((len(data_points_df), len(X_columns))), columns=X_columns)    

    # location counter for the feature DataFrame in order to append rows
    loc_count = 0

    # variables for progress printing
    if print_progress:
        prog_count = 0
        max_count = len(data_points_df.csv_file.unique()) # number of unique csv-files
        prev_progress = 0 # previous progress

    # going through all csv-files (unique --> only once for each file)
    for current_csv_file in data_points_df.csv_file.unique():

        # join file path
        file_path = os.path.join(csv_data_dir, current_csv_file)

        # load the signal data of the current file
        selected_data_df = pd.read_csv(file_path, skiprows=csv_skiprows, sep=csv_separator)
        
        # write data with selected signals to dict
        selected_data = {}
        for sig in signal_abbrs:
            selected_data[sig] = selected_data_df.filter(regex=sig+'*').values
            
        # rotate the signals if rotation angle > 0
        if rot_angle > 0:
            selected_data = rotate_signal(selected_data, 
                                          axis=rot_axis, 
                                          rot_angle=rot_angle, 
                                          signals=signal_abbrs)
            
        # add noise to signal if corresponding parameter is True
        if add_noise is True:
            selected_data = add_noise_to_signal(selected_data,
                                                target_snr_db=target_snr_db, 
                                                signals=signal_abbrs, 
                                                signal_orientations=signal_orientations)
    
    
        # data frame with all repetitions of the current file
        current_data_points = data_points_df.loc[data_points_df[csv_file_key] == current_csv_file]

        # going through all repetitions of the current file and calculating the section means
        for ii in range(len(current_data_points)):

            # reset indices of current data frame in order to go through all rows 
            # and get start and stop indices via sampling rate
            start_idx = int(float(current_data_points.reset_index().loc[ii,start_time_key]) * sampling_rate)
            stop_idx = int(float(current_data_points.reset_index().loc[ii,stop_time_key]) * sampling_rate)

            # calculate the corresponding section means of the current repetition    
            section_means = split_range_into_sections(signal_data = selected_data,
                                                      num_sec = num_sections,
                                                      signals = signal_abbrs,
                                                      start_index = start_idx,
                                                      stop_index = stop_idx)
            
            # append the features to the DataFrame
            X_df.loc[loc_count] = np.concatenate([section_means[sig].transpose().flatten() for sig in signal_abbrs])

            # append current label (string + integer)
            current_ex_abbr = current_data_points.reset_index().loc[ii,abbreviation_key]
            y_df.loc[loc_count] = [current_ex_abbr, labels_abbr2num_dict[current_ex_abbr]]

            loc_count += 1

        # print progress of feauture generation
        if print_progress:
            prog_count += 1
            prev_progress = print_progress_func(prog_count, max_count, prev_progress, add_info=progress_info)
    
    clear_output()
    
    return X_df, y_df

In [89]:
class PhysioData_SectionFeatures():
    '''
    Class for feature generation using section means.
    There are various selectable options --> see Parameters. 
    
    Parameters
    ----------
    num_sections : int
        Number of equally partitioned sections to split the single repetitions of the signals.
        
    test_subject_ids : int or list (of int)
        Subject IDs to select for testing (e.g. [1, 2, 3]).
        --> default -1: Select all subjects.
        --> if test_subject_ids is an empty list: empty DataFrame is returned by corresponding method.
        
    train_subject_ids : int or list
        Subject IDs to select for training (e.g. [1, 2, 3]).
        --> default -1: Select all subjects not in test_subject_ids.
        --> if train_subject_ids is an empty list: empty DataFrame is returned by corresponding method.
        
    test_rep_nums : int or list
        Repetition numbers to select for testing (e.g. [5, 10]).
        --> default -1: Select all repetitions.
        
    train_rep_nums : int or list
        Repetition numbers to select for training (e.g. [5, 10]).
        --> default -1: Select all repetitions.
        
    test_ex_abbrs : int or list
        Exercise abbreviations to select for testing (e.g. ['RF', 'SA']).
        --> default -1: Select all exercise abbreviations.
        
    train_ex_abbrs : int or list
        Exercise abbreviations to select for training (e.g. ['RF', 'SA']).
        --> default -1: Select all exercise abbreviations.
    
    with_non_Ex : boolean
        If False --> omit non exercise data (data points with zero repetitions).
        
    rot_axis_test_data : int
        Axis for rotation:
        0, 1 or 2 --> x, y or z
    
    rot_angle_test_data : int or float
        Rotation angle in degree.
    
    add_noise_test_data : boolean
        If True --> Additive White Gaussian Noise (AWGN) is added to signals of data for testing.
    
    snr_db : int or float
        Desired signal to noise ratio in db for the generated noisy test signals.
    
    csv_data_dir : string
        Directory of signal data csv-files.
        
    csv_skiprows : int
        Number of rows to skip for signal data csv-files.
        
    csv_separator : string
        Separator for signal data csv-files.
    
    data_base_path : string
        Path to data base (containing at least the following):
            - subject IDs
            - exercise abbreviations
            - number of repetitions
            - sequence numbers
            - start times
            - stop times
            - csv-file name
        
    print_progress : boolean
        If True --> print progress at feature generation.
    
    signal_abbrs : list of strings
        Abbreviations of the signals (e.g. ['Acc','Gyr']).
    
    signal_orientations : list of strings
        Orientations of the signals (e.g. ['x','y','z']).
        
    labels_abbr2num_dict : dict
        Dictionary to convert exercise abbreviations to number (e.g. ={'RF':0,'RO':1,'RS':2, ... }).
    
    sub_id_key : string
        Key of the DataFrame for subject IDs.
        
    num_rep_key : string
        Key of the DataFrame for repetition numbers.
        
    abbreviation_key : string
        Key of the DataFrame for exercise abbreviations.
        
    start_time_key : strings
        Start time key for DataFrame which contains data base entries.
        
    stop_time_key : strings
        Stop time key for DataFrame which contains data base entries.
        
    csv_file_key : strings
        csv-file key for DataFrame which contains data base entries.
        
    sampling_rate : int or float
        Sampling rate of the signals in Hz.
        

    Attributes
    ----------
    X_test_df : DataFrame
        Features for testing.
    
    y_test_df : DataFrame
        Labels for testing.
    
    X_train_df : DataFrame
        Features for training.
    
    y_train_df : DataFrame
        Labels for testing.
    
    
    test_data_points_df : DataFrame
        Data points for testing from data base.
    
    train_data_points_df : DataFrame
        Data points for training from data base.
    
    all_data_points_df : DataFrame
        All data points from data base.


    Methods
    -------
    get_X_test_df()
        Returns features for testing as DataFrame.
    
    get_y_test_df() :
        Returns labels for testing as DataFrame.
    
    get_X_train_df() :
        Returns features for training as DataFrame.
    
    get_y_train_df() :
        Returns labels for testing as DataFrame.
    
    
    X_test():
        Returns feature matrix for testing as np.array.
    
    y_test():
        Returns numeric labels for testing as np.array.
    
    X_train():
        Returns feature matrix for training as np.array.
    
    y_train():
        Returns numeric labels for training as np.array.
    
    
    get_test_data_points()
        Returns data points for testing from data base as DataFrame.
    
    get_train_data_points()
        Returns data points for training from data base as DataFrame.
    
    get_all_data_points()
        Returns all data points from data base as DataFrame.
    '''
    def __init__(self,
                 num_sections=10,
                 test_subject_ids=-1,
                 train_subject_ids=-1,
                 test_rep_nums=-1,
                 train_rep_nums=-1,
                 test_ex_abbrs=-1,
                 train_ex_abbrs=-1,
                 with_non_Ex=True,
                 rot_axis_test_data=0,
                 rot_angle_test_data=0,
                 add_noise_test_data=False,
                 add_noise_train_data=False,
                 snr_db=20,
                 csv_data_dir='E:\Physio_Data_Split_Ex_and_NonEx',
                 csv_skiprows=0,
                 csv_separator=',',
                 data_base_path='E:\Physio_Data\DataBase_Physio_with_nonEx.db',
                 print_progress=True,
                 signal_abbrs=['Acc','Gyr'],
                 signal_orientations=['x','y','z'],
                 labels_abbr2num_dict={'RF':0,'RO':1,'RS':2,'LR':3,'BC':4,'TC':5,'MP':6,'SA':7,'P1':8,'P2':9,'NE':10},
                 sub_id_key='subject_id',
                 num_rep_key='num_rep',
                 abbreviation_key='abbreviation',
                 start_time_key='start_time',
                 stop_time_key='stop_time',
                 csv_file_key='csv_file',
                 sampling_rate=256):
        """
        Parameters
        ----------
        --> See class docstring.
        """
        
        # load all data from data points
        self.all_data_points_df = load_data_from_database(data_base_path)
        
        # load data points for testing if list is not empty
        self.test_data_points_df =  select_data_points_from_df(self.all_data_points_df,
                                                               subject_ids=test_subject_ids,
                                                               subject_ids_complementary=[],
                                                               reps=test_rep_nums,
                                                               abbrs=test_ex_abbrs,
                                                               with_non_Ex=with_non_Ex,
                                                               sub_id_key=sub_id_key,
                                                               num_rep_key=num_rep_key,
                                                               abbreviation_key=abbreviation_key)
        
        # load data points for training
        self.train_data_points_df = select_data_points_from_df(self.all_data_points_df,
                                                               subject_ids=train_subject_ids,
                                                               subject_ids_complementary=test_subject_ids,
                                                               reps=train_rep_nums,
                                                               abbrs=train_ex_abbrs,
                                                               with_non_Ex=with_non_Ex,
                                                               sub_id_key=sub_id_key,
                                                               num_rep_key=num_rep_key,
                                                               abbreviation_key=abbreviation_key)
        
        # generate features for testing if corresponding DataFrame is not empty
        if not self.test_data_points_df.empty:
            self.X_test_df, self.y_test_df =   generate_section_features_from_separate_repetitions(
                                               data_points_df=self.test_data_points_df,
                                               num_sections=num_sections,
                                               csv_data_dir=csv_data_dir,
                                               csv_skiprows=csv_skiprows,
                                               csv_separator=csv_separator,
                                               signal_abbrs=signal_abbrs,
                                               rot_axis=rot_axis_test_data,
                                               rot_angle=rot_angle_test_data,
                                               add_noise=add_noise_test_data,
                                               target_snr_db=snr_db,
                                               signal_orientations=signal_orientations,
                                               labels_abbr2num_dict=labels_abbr2num_dict,
                                               sampling_rate=sampling_rate,
                                               abbreviation_key=abbreviation_key,
                                               start_time_key=start_time_key,
                                               stop_time_key=stop_time_key,
                                               csv_file_key=csv_file_key,
                                               print_progress=print_progress,
                                               progress_info='Generate features for testing...')
            
        # otherwise create empty DataFrames for test features and labels
        else:
            self.X_test_df = pd.DataFrame()
            self.y_test_df = pd.DataFrame()
        
        # generate features for training if corresponding DataFrame is not empty
        if not self.train_data_points_df.empty:
            self.X_train_df, self.y_train_df = generate_section_features_from_separate_repetitions(
                                               data_points_df=self.train_data_points_df,
                                               num_sections=num_sections,
                                               csv_data_dir=csv_data_dir,
                                               csv_skiprows=csv_skiprows,
                                               csv_separator=csv_separator,
                                               signal_abbrs=signal_abbrs,
                                               add_noise=add_noise_train_data,
                                               target_snr_db=snr_db,
                                               signal_orientations=signal_orientations,
                                               labels_abbr2num_dict=labels_abbr2num_dict,
                                               sampling_rate=sampling_rate,
                                               abbreviation_key=abbreviation_key,
                                               start_time_key=start_time_key,
                                               stop_time_key=stop_time_key,
                                               csv_file_key=csv_file_key,
                                               print_progress=print_progress,
                                               progress_info='Generate features for training...')
            
        # otherwise create empty DataFrames for train features and labels
        else:
            self.X_train_df = pd.DataFrame()
            self.y_train_df = pd.DataFrame()
    
    
    # methods to get features
    def get_X_test_df(self):
        return self.X_test_df
    
    def get_y_test_df(self):
        return self.y_test_df
    
    def get_X_train_df(self):
        return self.X_train_df
    
    def get_y_train_df(self):
        return self.y_train_df
    
    
    # methods to get feature values only
    def X_test(self):
        return self.X_test_df.values
    
    def y_test(self):
        return self.y_test_df.values[:,1].flatten().astype('int')
    
    def X_train(self):
        return self.X_train_df.values
    
    def y_train(self):
        return self.y_train_df.values[:,1].flatten().astype('int')
    
    
    # methods to get data points (DataFrames)
    def get_test_data_points(self):
        return self.test_data_points_df
    
    def get_train_data_points(self):
        return self.train_data_points_df
    
    def get_all_data_points(self):
        return self.all_data_points_df


## Create instance of the class *PhysioData_SectionFeatures*

In [90]:
PD1 = PhysioData_SectionFeatures(num_sections=10, test_subject_ids=1, train_subject_ids=[2,3,4])

### Get feature DataFrame

In [91]:
PD1.get_X_test_df()

Unnamed: 0,Acc_x_01,Acc_x_02,Acc_x_03,Acc_x_04,Acc_x_05,Acc_x_06,Acc_x_07,Acc_x_08,Acc_x_09,Acc_x_10,...,Gyr_z_01,Gyr_z_02,Gyr_z_03,Gyr_z_04,Gyr_z_05,Gyr_z_06,Gyr_z_07,Gyr_z_08,Gyr_z_09,Gyr_z_10
0,-0.939115,-0.851133,-0.074181,0.516587,0.583770,0.231318,-0.423175,-0.986674,-0.975231,-0.919216,...,101.202885,174.671402,125.190385,39.502885,-11.670455,-99.800000,-154.894231,-135.348077,-64.978220,-5.134615
1,-0.928223,-1.003425,-0.495449,0.210023,0.572557,0.367981,-0.302287,-0.946587,-0.933463,-0.910428,...,37.361111,138.007042,153.595486,91.167254,11.751736,-56.511285,-155.176937,-146.173611,-67.409331,3.184028
2,-0.896511,-0.949733,-0.381539,0.288838,0.534470,0.327676,-0.230033,-0.866844,-0.950143,-0.927022,...,46.017736,132.607264,144.309122,60.873311,9.449324,-60.369932,-131.244088,-142.970439,-68.365709,3.009291
3,-0.920389,-0.979775,-0.607063,0.117471,0.497325,0.381730,-0.176354,-0.728860,-0.956564,-0.907595,...,33.522645,126.680254,153.138787,101.720109,14.466486,-64.136775,-122.122283,-140.272059,-93.033514,-3.357790
4,-0.935854,-0.871847,-0.278697,0.262681,0.546331,0.394518,-0.083008,-0.618541,-0.902532,-0.909180,...,61.901786,138.570536,131.164286,64.818750,6.863393,-46.659331,-110.113393,-116.171429,-95.745536,-13.732143
5,-0.982135,-0.958350,-0.090957,0.464355,0.556547,0.134514,-0.480993,-0.877879,-0.942412,-0.927677,...,77.729779,181.550373,127.208640,32.466418,-11.859375,-100.484375,-136.397388,-116.132353,-59.947761,-8.415441
6,-0.931104,-0.979980,-0.765009,0.035779,0.479407,0.425504,-0.064612,-0.728522,-0.952246,-0.943359,...,4.285156,86.618750,150.970313,90.615625,26.372656,-32.627315,-103.882812,-135.796094,-89.035938,0.826562
7,-0.925398,-0.950846,-0.576444,0.099315,0.493115,0.397362,-0.143373,-0.718262,-0.939678,-0.926980,...,30.662975,96.931090,138.069620,91.614583,18.207278,-46.072785,-115.727564,-120.603639,-72.330929,-8.629747
8,-0.925960,-0.995340,-0.550014,0.044282,0.491527,0.294155,-0.132966,-0.617359,-0.923154,-0.916099,...,19.910211,128.681250,167.218750,84.916373,0.285211,-63.336607,-103.136607,-118.556338,-92.839789,-20.725000
9,-0.919057,-1.016490,-0.502703,0.123926,0.481480,0.343248,-0.225572,-0.770890,-0.935965,-0.909068,...,24.557143,140.980357,140.106884,82.936607,18.852679,-41.758036,-139.189286,-132.621377,-78.433036,-10.722321


### Get label DataFrame

In [92]:
PD1.get_y_test_df()

Unnamed: 0,ex_abbr,ex_num
0,RF,0
1,RF,0
2,RF,0
3,RF,0
4,RF,0
5,RF,0
6,RF,0
7,RF,0
8,RF,0
9,RF,0


### Get data points and show via *pivot_ui()*

In [93]:
pivot_ui(PD1.get_train_data_points(), rows=['abbreviation'], cols=['subject_id', 'num_rep'])

## Create instance with different parameters

In [94]:
PD2 = PhysioData_SectionFeatures(num_sections=15,
                                 test_subject_ids=1,
                                 train_subject_ids=2,
                                 test_rep_nums=5,
                                 train_rep_nums=[10,15],
                                 test_ex_abbrs=['RF','RO','NE'],
                                 train_ex_abbrs=['RF','RO','NE'],
                                 with_non_Ex=True,
                                 rot_axis_test_data=0,
                                 rot_angle_test_data=180,
                                 add_noise_test_data=True,
                                 add_noise_train_data=False,
                                 snr_db=10,
                                 csv_data_dir='E:\Physio_Data_Split_Ex_and_NonEx',
                                 csv_skiprows=0,
                                 csv_separator=',',
                                 data_base_path='DataBase_Physio_with_nonEx.db',
                                 print_progress=True)

In [95]:
pivot_ui(PD2.get_test_data_points(), rows=['abbreviation'], cols=['subject_id', 'num_rep'])

In [96]:
pivot_ui(PD2.get_train_data_points(), rows=['abbreviation'], cols=['subject_id', 'num_rep'])

In [97]:
PD2.get_X_test_df().head()

Unnamed: 0,Acc_x_01,Acc_x_02,Acc_x_03,Acc_x_04,Acc_x_05,Acc_x_06,Acc_x_07,Acc_x_08,Acc_x_09,Acc_x_10,...,Gyr_z_06,Gyr_z_07,Gyr_z_08,Gyr_z_09,Gyr_z_10,Gyr_z_11,Gyr_z_12,Gyr_z_13,Gyr_z_14,Gyr_z_15
0,-0.857676,-1.033115,-0.669387,-0.28725,0.232313,0.571545,0.594165,0.41687,0.168735,-0.308227,...,-28.954581,4.802698,43.109784,121.708344,135.567449,162.616767,124.074004,88.251828,18.212624,-1.054605
1,-0.867739,-0.965264,-1.028191,-0.598242,-0.085659,0.205939,0.582183,0.57378,0.248658,-0.211793,...,-80.268447,-21.188373,3.556291,78.338119,150.549867,174.857062,133.011299,82.070274,21.909536,-6.41444
2,-0.975364,-0.959977,-0.919252,-0.571539,0.086229,0.362144,0.500562,0.417965,0.237323,-0.184238,...,-43.400611,-15.886073,11.723126,80.514382,117.708781,153.002645,144.464851,85.454747,23.354495,-2.460826
3,-0.966882,-0.909885,-1.0039,-0.737059,-0.133936,0.257653,0.456448,0.565398,0.297803,-0.075649,...,-93.284842,-32.015597,23.654314,79.72625,111.919325,143.011677,137.309853,109.776289,41.311845,-15.912801
4,-0.957541,-0.945354,-0.831891,-0.380368,0.010999,0.291409,0.539755,0.558289,0.36072,0.028078,...,-49.156614,-17.242181,15.604161,65.04722,99.768834,134.590869,113.396374,105.701653,65.770264,1.352139


In [101]:
PD2 = PhysioData_SectionFeatures(num_sections=15,
                                 test_subject_ids=1,
                                 train_subject_ids=[],
                                 test_rep_nums=5,
                                 train_rep_nums=[10,15],
                                 test_ex_abbrs=['RF','RO','NE'],
                                 train_ex_abbrs=['RF','RO','NE'],
                                 with_non_Ex=True,
                                 rot_axis_test_data=0,
                                 rot_angle_test_data=180,
                                 add_noise_test_data=True,
                                 add_noise_train_data=False,
                                 snr_db=10,
                                 csv_data_dir='E:\Physio_Data_Split_Ex_and_NonEx',
                                 csv_skiprows=0,
                                 csv_separator=',',
                                 data_base_path='E:\Physio_Data\DataBase_Physio_with_nonEx.db',
                                 print_progress=True)

In [99]:
PD2.get_X_train_df().head()

In [100]:
PD2.get_X_test_df().head()

Unnamed: 0,Acc_x_01,Acc_x_02,Acc_x_03,Acc_x_04,Acc_x_05,Acc_x_06,Acc_x_07,Acc_x_08,Acc_x_09,Acc_x_10,...,Gyr_z_06,Gyr_z_07,Gyr_z_08,Gyr_z_09,Gyr_z_10,Gyr_z_11,Gyr_z_12,Gyr_z_13,Gyr_z_14,Gyr_z_15
0,-0.848464,-1.134152,-0.779393,-0.239363,0.318976,0.608676,0.604253,0.438368,0.147345,-0.351869,...,-11.091429,6.782749,43.664962,119.836008,145.237225,167.291356,118.553257,82.477242,25.581931,-0.725724
1,-0.907887,-0.987134,-0.968495,-0.594876,-0.082958,0.341845,0.575664,0.57566,0.275103,-0.153795,...,-74.507867,-15.215605,3.578184,74.46932,151.274524,164.593928,130.608135,78.870797,24.278115,-6.621698
2,-0.919884,-0.958872,-0.875329,-0.569291,0.080634,0.321845,0.526832,0.440039,0.259229,-0.140391,...,-44.580617,-14.391387,21.570234,76.528352,125.086249,145.089398,140.226553,84.330651,22.541825,-3.066551
3,-0.968742,-0.920778,-1.052502,-0.765661,-0.127699,0.262531,0.481593,0.504732,0.305495,-0.108049,...,-93.236564,-26.651592,30.831643,76.917668,122.264006,146.316318,137.271061,107.792391,33.470396,-6.439163
4,-0.938268,-0.946809,-0.863781,-0.419798,0.061544,0.329336,0.60035,0.547726,0.306862,0.022311,...,-54.083251,-22.374582,18.985301,61.742799,98.387111,131.397544,117.328914,102.673897,64.622929,2.332624
