# First Machine Learning Approach with Exercise and Non-Ex. Data

In [60]:
import numpy as np
import pandas as pd
import sqlite3
import time
import itertools
import os
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.widgets import Slider, TextBox, Button
from IPython.display import clear_output
import functionsMasterProjectMeinhart as fmpm

## Training Data

In [61]:
# load all data, except data from one subject (test data)
test_data_subject = 1

db_name='DataBase_Physio_with_nonEx.db' # database name
exercise_abbrs = ['RF','RO','RS','LR','BC','TC','MP','SA','P1','P2','NE'] # exercise abbreviations
# Connect to an existing database
conn = sqlite3.connect(db_name)
cur = conn.cursor()
train_data_points = {} # dictionary with the exercise abbreviation as key
for key in exercise_abbrs:
    # sql command to extract data
    query_sql = """
        SELECT r.start_time, r.stop_time, e.csv_file
        FROM subjects s
        INNER JOIN exercises e
        ON s.id = e.subject_id
        INNER JOIN paradigms p
        ON p.id = e.paradigm_id
        INNER JOIN repetitions r
        ON e.id = r.exercise_id
        WHERE p.abbreviation = '{}'
        AND NOT s.id = {}
        """.format(key, test_data_subject)
    # get data from data base and close connection
    train_data_points[key] = pd.read_sql_query(query_sql, conn)
conn.close()

In [62]:
# Head of one loaded data frame as an example:
train_data_points['NE'].head()

Unnamed: 0,start_time,stop_time,csv_file
0,0.0,3.6097522701321,subject02_00_nonEx.csv
1,3.6097522701321,5.98056861437206,subject02_00_nonEx.csv
2,5.98056861437206,7.84471642992804,subject02_00_nonEx.csv
3,7.84471642992804,12.3377339822144,subject02_00_nonEx.csv
4,12.3377339822144,15.5979262935134,subject02_00_nonEx.csv


In [63]:
print('Length of the individual data frames:')
count = 0
for key in exercise_abbrs:
    print(key + ':\t' + str(train_data_points[key].shape[0]))
    count += train_data_points[key].shape[0]
print('total:\t' + str(count))

Length of the individual data frames:
RF:	239
RO:	240
RS:	240
LR:	241
BC:	242
TC:	243
MP:	242
SA:	242
P1:	240
P2:	239
NE:	3712
total:	6120


## Generate and save features for training

In [64]:
# number of sections to split the signals
number_sections = 10

# directory of csv file
csv_dir='E:\Physio_Data_Split_Ex_and_NonEx'

#  csv-file to save the features
save_dir  = 'E:\Physio_Features'
save_file_name = 'features_without_subject{0:02}_Ex_nonEx_sections{1:02}.csv'.format(
    test_data_subject, number_sections)
feature_csv_file = os.path.join(save_dir, save_file_name)
print(feature_csv_file)

sampling_rate = 256 # [Hz]
sig_names = ['Acc','Gyr'] # signals which shall be considered for the mean calculation

E:\Physio_Features\features_without_subject01_Ex_nonEx_sections10.csv


***Cell below is only executed if feature csv-file does not already exist***

In [65]:
# this cell shall only be executed if the feature file does not already exist
if not os.path.isfile(feature_csv_file):

    # putting the header of the feature-file together
    header_string = 'label;' # first column contains the labels

    for sig in sig_names:
        for ax in ['_x','_y','_z']:
            for ii in range(number_sections):
                header_string +=  sig + ax + '_{:02}'.format(ii+1) + ';'

    # remove last separator (;)
    idx_last_sep = header_string.rfind(";")
    header_string =  header_string[:idx_last_sep]

    # write header to file
    with open(feature_csv_file, 'w') as feature_file:
        feature_file.writelines(header_string + '\n')

    # go through all exercises
    for ex in exercise_abbrs:

        # go through all repetitions (data points) of the current exercise
        for ii in range(len(train_data_points[ex])):

            # join file path
            file_path = os.path.join(csv_dir, train_data_points[ex]['csv_file'][ii])

            # load the signal data of the corresponding time range of the current repetition
            selected_data = fmpm.get_sensor_data(in_file = file_path, 
                                                 signals = sig_names, 
                                                 sampling_rate = sampling_rate, 
                                                 start_time = float(train_data_points[ex]['start_time'][ii]), 
                                                 stop_time = float(train_data_points[ex]['stop_time'][ii]))

            # calculate the corresponding section means of the current repetition
            section_means = fmpm.split_range_into_sections(signal_data = selected_data,
                                                           num_sec = number_sections,
                                                           signals = sig_names)

            # string to write data of the current data point to the csv-file
            data_point_string = ex + ';' # first column contains the label

            # copy section mean values to string
            for sig in sig_names:
                for jj in [0,1,2]: # x, y, z comp. of the corresponding signal
                    for ll in range(number_sections):

                        # append to string for writing to csv file (5 decimals)
                        data_point_string += "{:.5f};".format(section_means[sig][ll,jj])

            # remove last separator (;)
            idx_last_sep = data_point_string.rfind(";")
            data_point_string =  data_point_string[:idx_last_sep]

            # append values of current data point to file
            with open(feature_csv_file, 'a') as feature_file:
                feature_file.writelines(data_point_string + '\n')

## Load the generated features

In [66]:
feature_train_data = pd.read_csv(feature_csv_file, skiprows=0, sep=';')
feature_train_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6120 entries, 0 to 6119
Data columns (total 61 columns):
label       6120 non-null object
Acc_x_01    6120 non-null float64
Acc_x_02    6120 non-null float64
Acc_x_03    6120 non-null float64
Acc_x_04    6120 non-null float64
Acc_x_05    6120 non-null float64
Acc_x_06    6120 non-null float64
Acc_x_07    6120 non-null float64
Acc_x_08    6120 non-null float64
Acc_x_09    6120 non-null float64
Acc_x_10    6120 non-null float64
Acc_y_01    6120 non-null float64
Acc_y_02    6120 non-null float64
Acc_y_03    6120 non-null float64
Acc_y_04    6120 non-null float64
Acc_y_05    6120 non-null float64
Acc_y_06    6120 non-null float64
Acc_y_07    6120 non-null float64
Acc_y_08    6120 non-null float64
Acc_y_09    6120 non-null float64
Acc_y_10    6120 non-null float64
Acc_z_01    6120 non-null float64
Acc_z_02    6120 non-null float64
Acc_z_03    6120 non-null float64
Acc_z_04    6120 non-null float64
Acc_z_05    6120 non-null float64
Acc_z_06  

In [67]:
feature_train_data.head()

Unnamed: 0,label,Acc_x_01,Acc_x_02,Acc_x_03,Acc_x_04,Acc_x_05,Acc_x_06,Acc_x_07,Acc_x_08,Acc_x_09,...,Gyr_z_01,Gyr_z_02,Gyr_z_03,Gyr_z_04,Gyr_z_05,Gyr_z_06,Gyr_z_07,Gyr_z_08,Gyr_z_09,Gyr_z_10
0,RF,-0.94862,-0.96133,-0.39374,0.35776,0.44864,0.05871,-0.50909,-0.86553,-0.91736,...,33.96461,119.09226,134.11672,45.16518,-20.69654,-83.85617,-101.04688,-79.37651,-28.57366,1.50226
1,RF,-0.91875,-0.90737,-0.56951,0.10319,0.40801,0.21426,-0.3308,-0.83806,-0.93589,...,21.89922,90.21641,118.35156,74.62266,-0.4875,-49.51187,-109.20234,-101.00547,-50.23281,-6.03594
2,RF,-0.93875,-0.93639,-0.60812,0.12386,0.46588,0.39421,-0.02835,-0.61382,-0.91854,...,15.96221,83.7311,123.72845,78.51526,9.25073,-30.06541,-83.66424,-102.5546,-72.67878,-8.51526
3,RF,-0.91961,-0.87408,-0.46247,0.17987,0.45833,0.28772,-0.27465,-0.78274,-0.88386,...,30.38491,94.18287,116.64787,70.11052,2.69985,-46.96799,-106.49924,-99.39024,-43.10108,-3.91768
4,RF,-0.89435,-0.92486,-0.64687,0.03317,0.46411,0.44731,0.05423,-0.60571,-0.92647,...,13.04983,87.60167,130.50169,99.33333,20.78632,-28.67314,-86.2025,-126.16892,-94.7425,-11.61824


### Generate feature matrix of training data

In [68]:
# get feature matrix
X_train = feature_train_data.values[:,1:]
np.shape(X_train)

(6120, 60)

### Generate label array of training data

In [69]:
# dictionary for labels
label_ex = {'RF':0,
            'RO':1,
            'RS':2,
            'LR':3,
            'BC':4,
            'TC':5,
            'MP':6,
            'SA':7,
            'P1':8,
            'P2':9,
            'NE':10}

# get label array with labels (0 ... 10)
labels_str = feature_train_data.values[:,0]
y_train = [label_ex[labels_str[ii]] for ii in range(len(feature_train_data.values[:,0]))]
np.shape(y_train)

(6120,)

## Training of ML model

In [70]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier

***Voting Classifier***

In [71]:
#LogReg_clf = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000, n_jobs=-1, random_state=42)
#RF_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=40, n_jobs=-1, random_state=42)

#voting_clf = VotingClassifier(estimators=[('lr', LogReg_clf), ('rf', RF_clf)], voting='soft')
#voting_clf.fit(X_train, y_train)

***Random Forest Classifier***

In [72]:
# create random forest classifier model
rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=50, n_jobs=-1, random_state=42)

# train the model
rnd_clf.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=50,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=-1,
            oob_score=False, random_state=42, verbose=0, warm_start=False)

## Test Data

Loading all sensor data from the test subject,
without knowing the start and stop times of the exercises.

In [73]:
# select file (csv) of test subject
file_dir  = r'E:\Physio_Data\Subject_{:02}'.format(test_data_subject)
file_name = 'subject{:02}.csv'.format(test_data_subject)
data_path = os.path.join(file_dir, file_name)

# the following (commented) lines are only necessary, if we want to load a certain time range
## selecet time range [min:sec]
#start_min_sec = '01:36.0' # Raises Oblique
#stop_min_sec  = '02:00.3'
## split time string and convert to float
#start_min = float(start_min_sec.split(':')[0])
#start_sec = float(start_min_sec.split(':')[1])
#stop_min = float(stop_min_sec.split(':')[0])
#stop_sec = float(stop_min_sec.split(':')[1])
## start and stop time in seconds
#start_time = start_min*60 + start_sec # [s]
#stop_time = stop_min*60 + stop_sec # [s]

# we load all data, hence we set start_time and stop_time None
start_time = None
stop_time = None

# get data from selected file
sensor_data = fmpm.get_sensor_data(in_file=data_path,
                                   sampling_rate=sampling_rate,
                                   start_time=start_time,
                                   stop_time=stop_time)


# filtering the data (but not necessary, because filtered data are not used so far)

# filter properties
cutoff = 10 # [Hz]
order = 6 # butterworth order

# filter data with butterworth filter and save to new dictionary
signal_keys = ['Acc', 'Gyr']
sensor_data_filt = {}
for signal in signal_keys:
    sensor_data_filt[signal] = fmpm.butter_lowpass_filter(sensor_data[signal], 
                                                          cutoff=cutoff, 
                                                          fs=sampling_rate, 
                                                          order=order)

## Windowing the test data

The windowing follows a certain procedure:

>•	Taking a 1 s block of the data

>•	Varying the block length from 1 s to 5 s with an increment of 200 ms (starting point remains the same for all blocks)

>•	Sectioning and feature generation for all blocks

>•	For each block class probabilities are calculated (ML classifier) 

>•	Sliding the starting point with an increment of 200 ms and starting again with a 1 s block varying to 5 s

*see animation below*

In [74]:
from IPython.display import HTML
HTML('<img src="windowing_procedure.gif" width=600 >')

In [75]:
# commented parameters are already defined

## sampling rate of the signals
#sampling_rate = 256 # [Hz]

## signal names
#sig_names= ['Acc','Gyr']

## number of sections to split the signal
#number_sections = 10

## abbreviations for exercises / non-exercise
#exercise_abbrs = ['RF','RO','RS','LR','BC','TC','MP','SA','P1','P2','NE']


# window start increment
win_start_inc = 0.2 # [s]

# window stretch increment
win_stretch_inc = 0.2 # [s]

# minimum window length
win_min_len = 1 # [s]

# maximim window length
win_max_len = 5 # [s]

# signal length (all sensor data must have same length --> Acc, Gyr, ...)
signal_len = len(sensor_data[sig_names[0]])

# window start time
win_start = 0 # [s]

# last window start time --> time where the minimum window length just fits into the sensor data
win_last_start = signal_len/sampling_rate - win_min_len

# number of different window sizes
num_win_sizes = len(np.arange(win_min_len, win_max_len+win_stretch_inc, win_stretch_inc))

# number of different window start points
num_start_points = len(np.arange(win_start, win_last_start, win_start_inc))

# dictionary with matrices to save predicted values for all classes
pred_matrix = {}
for ex in exercise_abbrs:
    pred_matrix[ex] = np.zeros([num_start_points, num_win_sizes])

# matrix with all generated features
feature_map = np.zeros([num_start_points * num_win_sizes, number_sections*6])

In [76]:
 def print_progress(current_num, max_num, prev_prog):
    '''
    Function to print progress [%] in a loop.
    
    Parameters
    ----------
    current_num : int
        Number of the current run in a loop.
        
    max_num : int
        Maximum number of runs in a loop.
        
    prev_prog : int
        Previous progress, to print only if necessary.
    
    Returns
    -------
    int
        Previous progress, important for next run.
    '''
    new_prog = int(current_num/max_num*100)
    
    if new_prog > prev_prog:
        clear_output(wait=True)
        print('Progress: {:3d}%'.format(new_prog))
        
    return new_prog

### Generating the feature map

In [77]:
# first let's check if the feature map already exists
feature_map_dir  = 'E:\Physio_Features'
feature_map_name = 'feature_map_subject{0:02}_sections{1:02}.csv'.format(
                    test_data_subject, number_sections)
feature_map_path = os.path.join(feature_map_dir, feature_map_name)

***Feature map is only generated if it does not already exist $\rightarrow$ otherwise it is loaded***

In [78]:
count = 0
max_count = len(feature_map)
prev_progress = 0 # previous progress

# only generate the feature map if it does not already exist
if not os.path.isfile(feature_map_path):

    # going through all window start points
    for ii, win_pos in enumerate(np.arange(win_start, win_last_start, win_start_inc)):

        # going through all window lengths  (+win_stretch_inc to include end point)
        for jj, win_len in enumerate(np.arange(win_min_len, win_max_len+win_stretch_inc, win_stretch_inc)):

            # call fun function to get the corresponding indices of the selected range 
            [idx_start, idx_stop] = fmpm.get_window_indices(signal_len, 
                                                            window_length = win_len, 
                                                            start_time = win_pos,
                                                            sampling_rate = sampling_rate, 
                                                            auto_end = True)

            # call fun function to split the selected range into sections
            section_means = fmpm.split_range_into_sections(sensor_data, 
                                                           num_sec = number_sections, 
                                                           signals = sig_names, 
                                                           start_index = idx_start, 
                                                           stop_index = idx_stop)

            # putting together the feature map
            feature_map[count,:] = np.concatenate((section_means[sig_names[0]].transpose(), 
                                                   section_means[sig_names[1]].transpose())).flatten().reshape(1, -1)

            count += 1

        prev_progress = print_progress(count, max_count, prev_progress)
    
    clear_output(wait=True)
    print('Saving the feature map...')
    
    # save feature map of test subject
    np.savetxt(feature_map_path, feature_map, delimiter=";")
    
    clear_output(wait=True)
    print('Feature map saved.')
    
    print('\nShape:')
    print(np.shape(feature_map))
    
# else --> feature map already exists --> load it
else:
    print('Loading the feature map...')
    
    feature_map = np.loadtxt(open(feature_map_path), delimiter=";")
    
    clear_output(wait=True)
    print('Feature map loaded.')
    
    print('\nShape:')
    print(np.shape(feature_map))

Feature map loaded.

Shape:
(210945, 60)


### Making predictions with the trained ML model

In [79]:
#pred_probs = voting_clf.predict_proba(feature_map)
pred_probs = rnd_clf.predict_proba(feature_map)
np.shape(pred_probs)

(210945, 11)

### Filling the prediction matrices (images) with probability values

In [80]:
count = 0
max_count = len(feature_map)
prev_progress = 0 # previous progress

# going through all window start points
for ii, win_pos in enumerate(np.arange(win_start, win_last_start, win_start_inc)):
    # going through all window lengths  (+win_stretch_inc to include end point)
    for jj, win_len in enumerate(np.arange(win_min_len, win_max_len+win_stretch_inc, win_stretch_inc)):
        
        for kk, ex in enumerate(exercise_abbrs):
            pred_matrix[ex][ii,jj] = pred_probs[count][kk]
            
        count += 1
        prev_progress = print_progress(count, max_count, prev_progress)

Progress: 100%


### Find peaks of prediction matrices and evaluate them

In [81]:
from scipy.ndimage.filters import maximum_filter

In [82]:
def detect_prob_map_peaks(prob_map):
    '''
    Function to detect the local peaks of a probability map.
    !! Global variables are used !!
    
    Parameters
    ----------
    prob_map : 2d-array
        Matrix with predicted probabilities.

    Returns
    -------
    array 
        array[0] ... peak time indices
        array[1] ... peak window length indices
        e.g. ([[ 390, 723, 1331, ...], [4, 4, 10, ...]], dtype=int64)
    '''
    
    # find only peaks with a minimum probability (threshold)
    threshold_prob = 0.5
    
    # length and height of the footprint for the maximum_filter (see below)
    footprint_length = 1 # [s]
    footprint_length_indices = int(footprint_length / win_start_inc)
    footprint_height = num_win_sizes * 2  # take number of all window sizes for footprint
    
    footprint=np.ones((footprint_length_indices,footprint_height))
    
    # applying a maximum filter and generating a boolean map for local maxima
    local_max = maximum_filter(prob_map, footprint=footprint)==prob_map
    
    # removing all maxima below the threshold
    local_max = (prob_map>=threshold_prob) & local_max
    
    # check if there are several points with the same probability at one local maxima
    #   --> remove them, otherwise we get more than one local maxima
    peak_indices_check = np.argwhere(local_max)
    if len(peak_indices_check) > 1:
        for ii in range(len(peak_indices_check)-1):
            row_ind, col_ind = peak_indices_check[ii]
            row_ind_next, col_ind_next = peak_indices_check[ii+1]
            if prob_map[row_ind,col_ind] == prob_map[row_ind_next,col_ind_next] and \
            row_ind_next-row_ind < footprint_length_indices:
                local_max[row_ind,col_ind] = False
    
    # get the maxima indices of the probability map
    peak_indices = np.argwhere(local_max).transpose()
    
    return peak_indices

In [83]:
def evaluate_peaks(peak_ind, pred_matrix):
    '''
    Function to evaluate the detected peaks.
    (see function detect_prob_map_peaks(prob_map))
    !! Global variables are used !!
    
    Parameters
    ----------
    peak_ind : 2d-array 
        array[0] ... peak time indices
        array[1] ... peak window length indices
        e.g. ([[ 390, 723, 1331, ...], [4, 4, 10, ...]], dtype=int64)
        
    pred_matrix : 2d-array
        Matrix with predicted probabilities.

    Returns
    -------
    dict
        Dictionary with exercise abbreviations as keys --> repetition blocks
    '''
    
    # define the maximum time between two peaks in a block
    max_time_between_peaks = 10 # [s]
    max_ind_between_peaks = int(max_time_between_peaks / win_start_inc)
    
    exercise_only_abbrs = [*peak_ind]
    
    # assign peaks to repetition blocks with min two repetitions (check previous and next peak distance)
    rep_blocks = {}
    for ii, ex in enumerate(exercise_only_abbrs):
        rep_blocks[ex] = []
        current_block = []
        first_of_block = True # remember if current peak is the first of the current block
        
        # going through all peaks of the current exercise if there are minimum two peaks
        if len(peak_ind[ex][0]) >= 2:
            for jj in range(len(peak_ind[ex][0])-1):
                
                # time index of the current peak
                time_index = peak_ind[ex][0][jj]
                
                # time index of the next peak
                next_time_index = peak_ind[ex][0][jj+1]

                # looking for first peak of current block
                if first_of_block is True: # omit the lst one --> no block possible
                    next_time_index = peak_ind[ex][0][jj+1]
                    if next_time_index-time_index <= max_ind_between_peaks:
                        current_block.append([peak_ind[ex][0][jj], peak_ind[ex][1][jj]]) # add the current peak
                        current_block.append([peak_ind[ex][0][jj+1], peak_ind[ex][1][jj+1]]) # and the next peak
                        first_of_block = False

                # first peak of current block already found
                elif first_of_block is False:
                    if next_time_index-time_index <= max_ind_between_peaks:
                        current_block.append([peak_ind[ex][0][jj+1], peak_ind[ex][1][jj+1]]) # add the next peak
                    else:
                        first_of_block = True
                        rep_blocks[ex].append(current_block)
                        current_block = []

            # one additional time at the end of the loop if something is stored in the current block
            if first_of_block is False:
                rep_blocks[ex].append(current_block)
    
    
    # if blocks are overlapping --> keep only the block with the highest predicted probabilities (sum)
    #    --> the more peaks in the block, the higher the sum of probabilities
    blocks_to_remove = []

    for ex1, ex2 in itertools.combinations(exercise_only_abbrs, 2):
        for ii in range(len(rep_blocks[ex1])):
            for jj in range(len(rep_blocks[ex2])):
                start_1 = rep_blocks[ex1][ii][0][0] # time index of the first peak in the current block 1
                stop_1 = rep_blocks[ex1][ii][-1][0] # time index of the last peak in the current block 1
                start_2 = rep_blocks[ex2][jj][0][0] # time index of the first peak in the current block 2
                stop_2 = rep_blocks[ex2][jj][-1][0] # time index of the last peak in the current block 2

                # check if the two blocks overlap
                if (start_1 >= start_2 and start_1 <= stop_2) or (stop_1 >= start_2 and stop_1 <= stop_2) \
                or (start_2 >= start_1 and start_2 <= stop_1) or (stop_2 >= start_1 and stop_2 <= stop_1):

                    # selecet the corresponding probability values of pred_matrix and sum them up
                    sum_prob_block_1 = pred_matrix[ex1][np.array(rep_blocks[ex1][ii])[:,0], 
                                                        np.array(rep_blocks[ex1][ii])[:,1]].sum()

                    sum_prob_block_2 = pred_matrix[ex2][np.array(rep_blocks[ex2][jj])[:,0], 
                                                        np.array(rep_blocks[ex2][jj])[:,1]].sum()

                    # compare the sum of the probabilities of the two blocks
                    if sum_prob_block_1 < sum_prob_block_2:
                        blocks_to_remove.append([ex1, ii])
                    else:
                        blocks_to_remove.append([ex2, jj])
    
    # ensure that there are no duplicates in the nested list
    blocks_to_remove_unique = []
    for sublist in blocks_to_remove:
        if sublist not in blocks_to_remove_unique:
            blocks_to_remove_unique.append(sublist)
    
    # by removing the blocks take the reversed sorted order of the block index
    #    --> so it is possible to remove all block without "refreshing" the indices
    #        (if one block is removed, higher indices of all other blocks are changing)
    for ex, block_ind in sorted(blocks_to_remove_unique, key=lambda x: x[1])[::-1]:
        rep_blocks[ex].pop(block_ind)
        
    return rep_blocks
        

In [84]:
# function to convert indices to time strings
def indices_to_time(start_index, stop_index):
    '''
    Function convert indices to time string.
    !! Global variables are used !!
    
    Parameters
    ----------
    start_index : int
        
    stop_index : int
    
    Returns
    -------
    str
        String with start and stop time (e.g. '14:39.6 - 15:19.4').
    '''
    start_time_text = '{0:02}:{1:04.1f}'.format(int(start_index*win_start_inc/60), 
                                               (start_index*win_start_inc)%60)
    stop_time_text = '{0:02}:{1:04.1f}'.format(int(stop_index*win_start_inc/60), 
                                               (stop_index*win_start_inc)%60)
    return start_time_text + ' - ' + stop_time_text

In [85]:
def print_rep_blocks(rep_blocks):
    '''
    Function to print progress [%] in a loop.
    !! Global variables are used !!
    
    Parameters
    ----------
    rep_blocks : dict
        Dictionary with nested list of repetition blocks.
    
    Returns
    -------
    no returns
    '''
    exercise_only_abbrs = ['RF','RO','RS','LR','BC','TC','MP','SA','P1','P2']
    
    for ex in exercise_only_abbrs:
        print('\nExercise: ' + ex)
        print('Number of blocks: {}\n'.format(len(rep_blocks[ex])))
        for block_num in range(len(rep_blocks[ex])):
            print('\tBlock #{}:'.format(block_num+1))
            print('\t\tRepetitions: {}'.format(np.shape(np.array(rep_blocks[ex][block_num]))[0]))
            start_index = rep_blocks[ex][block_num][0][0]
            stop_index = rep_blocks[ex][block_num][-1][0]
            # for the stop index we have to consider the length of the last repetition
            stop_index += int((rep_blocks[ex][block_num][-1][1]*win_stretch_inc + win_min_len) / win_start_inc)
            print('\t\tTime range: ' + indices_to_time(start_index, stop_index))

In [86]:
# apply the functions for the peak evaluation
exercise_only_abbrs = ['RF','RO','RS','LR','BC','TC','MP','SA','P1','P2']

peak_ind = {}
for ex in exercise_only_abbrs:
    peak_ind[ex] = detect_prob_map_peaks(pred_matrix[ex])
    
rep_blocks = evaluate_peaks(peak_ind, pred_matrix)   

print_rep_blocks(rep_blocks)


Exercise: RF
Number of blocks: 3

	Block #1:
		Repetitions: 10
		Time range: 13:43.8 - 14:13.6
	Block #2:
		Repetitions: 15
		Time range: 14:39.6 - 15:19.4
	Block #3:
		Repetitions: 5
		Time range: 16:08.2 - 16:22.6

Exercise: RO
Number of blocks: 3

	Block #1:
		Repetitions: 15
		Time range: 01:17.4 - 01:57.0
	Block #2:
		Repetitions: 5
		Time range: 02:20.8 - 02:36.2
	Block #3:
		Repetitions: 10
		Time range: 02:54.0 - 03:22.4

Exercise: RS
Number of blocks: 3

	Block #1:
		Repetitions: 15
		Time range: 20:12.2 - 20:59.4
	Block #2:
		Repetitions: 5
		Time range: 21:24.0 - 21:39.6
	Block #3:
		Repetitions: 10
		Time range: 22:08.8 - 22:37.4

Exercise: LR
Number of blocks: 3

	Block #1:
		Repetitions: 5
		Time range: 10:42.4 - 10:56.0
	Block #2:
		Repetitions: 10
		Time range: 11:24.4 - 11:51.0
	Block #3:
		Repetitions: 15
		Time range: 12:11.2 - 12:51.4

Exercise: BC
Number of blocks: 3

	Block #1:
		Repetitions: 15
		Time range: 16:49.8 - 17:29.2
	Block #2:
		Repetitions: 11
		Time 

## Comparing predicted probabilities with actual classes

### First we have to load the actual time ranges of the exercises of the test data

In [87]:
# loading the actual time ranges of the exercises of the test data for the subsequent plot 
# to compare with predicted values

# select file with timetable (csv) of the test subject
timetable_file_dir = r'E:\Physio_Data\Exercise_time_tables'
timetable_file_name = 'Timetable_subject{:02}.txt'.format(test_data_subject)
timetable_data_path = os.path.join(timetable_file_dir, timetable_file_name)

# read in time table
timetable_data = pd.read_csv(timetable_data_path, skiprows=0, sep='\t', header=None)
num_exercises = timetable_data.shape[0] # number of exercises
timetable_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,Raises Oblique,15,5,10,01:18.6,01:58.3,02:22.1,02:37.1,02:54.8,03:23.3
1,PNF Diagonal 2,10,5,15,04:27.1,04:54.3,05:24.5,05:38.9,06:25.8,07:05.1
2,Triceps Curls,15,5,10,07:32.3,08:14.8,08:49.5,09:04.9,09:46.1,10:12.6
3,Rotation Wrist,5,10,15,10:43.1,10:57.3,11:25.6,11:51.8,12:12.1,12:52.4
4,Raises Front,10,15,5,13:44.8,14:14,14:40.6,15:20,16:09.5,16:23.1
5,Biceps Curls,15,10,5,16:50.8,17:30.2,18:01.3,18:32.2,18:58.6,19:12
6,Raises Side,15,5,10,20:13,21:00.2,21:24.9,21:40.2,22:09.6,22:38.4
7,PNF Diagonal 1,10,15,5,23:43.9,24:15.9,24:42,25:25.1,25:48.7,26:04.6
8,Shoulder Adduct.,5,15,10,26:48.5,27:05.3,27:25.4,28:16.5,28:41.2,29:10.9
9,Military Press,15,10,5,29:45.6,30:37.4,31:14.6,31:47.3,32:13.6,32:28.1


### Plotting of the predicted probabilities and actual classes

In [88]:
%matplotlib auto

# text for current subject
sub_text = 'Subject {}\n'.format(test_data_subject)

yticks = np.arange(0, win_max_len-win_min_len+win_stretch_inc, 2) / win_stretch_inc
ylabels = ['{}'.format(yticks[ii] * win_stretch_inc + win_min_len) for ii in range(len(yticks))]

fig, axis = plt.subplots(12,1,figsize=(18,9), sharex=True)

# image color settings for RFC probabilities
cmap = plt.cm.seismic
vmin=0
vmax=1

for ax, ex in zip(axis, exercise_abbrs):
    s = ax.imshow(pred_matrix[ex].transpose(), interpolation='nearest', 
                  aspect='auto', cmap=cmap, vmin=vmin, vmax=vmax)
    ax.invert_yaxis()
    ax.set_yticks(yticks)
    ax.set_yticklabels(ylabels, fontsize=7)
    ax.set_ylabel(ex, rotation=0, fontsize=13)
    ax.yaxis.labelpad = 32
    ax.xaxis.set_ticklabels([])
    
# plot crosses for image peaks
for ax, ex in zip(axis, exercise_only_abbrs):
    #ax.plot(peak_ind[ex][0], peak_ind[ex][1], '+g', markersize=8, markeredgewidth=1.5)
    for ii in range(len(rep_blocks[ex])):
        x_peak = np.array(rep_blocks[ex][ii])[:,0]
        y_peak = np.array(rep_blocks[ex][ii])[:,1]
        ax.plot(x_peak, y_peak, '+g', markersize=8, markeredgewidth=1.5)
    
plt.gcf().text(0.1, 0.6, r'window length $[s]$', fontsize=10, rotation=90)

axis[-1].plot(range(num_start_points), np.zeros(num_start_points), 'k', alpha=0.0)
formatter = matplotlib.ticker.FuncFormatter(lambda i, x: time.strftime('%M:%S', time.gmtime(i*win_start_inc+win_start)))
axis[-1].xaxis.set_major_formatter(formatter)
axis[-1].set_xlabel(r'time $[min:sec]$', fontsize=13)
axis[-1].set_yticks([])
axis[-1].set_ylim([0,1])

fig.subplots_adjust(bottom=0.2, right=0.9) # make space for buttons and color bar
cbar_ax = fig.add_axes([0.93, 0.255, 0.01, 0.625])
fig.colorbar(s, cax=cbar_ax)

# add slider for selections on the x axis
Slider_shiftX_ax = plt.axes([0.125, 0.07, 0.775, 0.025])
Slider_zoomX_ax = plt.axes([0.125, 0.035, 0.775, 0.025])

axcolor = 'cornflowerblue'
Slider_shiftX = Slider(Slider_shiftX_ax, 'time shift [%]', 0.0, 100.0, valinit=0, facecolor=axcolor)
Slider_zoomX = Slider(Slider_zoomX_ax, 'time scale [%]', 0.1, 100.0, valinit=100, facecolor=axcolor)
Slider_zoomX_ax.xaxis.set_visible(True)
Slider_zoomX_ax.set_xticks(np.arange(0,105,5)) 

def updateX(val):
    start_index = int(Slider_shiftX.val / 100 * num_start_points)
    stop_index = start_index + Slider_zoomX.val / 100 * num_start_points
    axis[-1].set_xlim((start_index, stop_index))
    fig.suptitle('Predicted Probabilities ' + sub_text + indices_to_time(start_index, stop_index), fontsize=20)
    plt.draw()

Slider_shiftX.on_changed(updateX)
Slider_zoomX.on_changed(updateX)

# add button to reset view
def resetX(val):
    start_index = 0
    stop_index = num_start_points
    axis[-1].set_xlim((start_index, stop_index))
    Slider_shiftX.reset()
    Slider_zoomX.reset()
    fig.suptitle('Predicted Probabilities ' + sub_text + indices_to_time(start_index, stop_index), fontsize=20)
    plt.draw()

Button_resetX_ax = plt.axes([0.85, 0.12, 0.05, 0.03])
Button_resetX = Button(Button_resetX_ax, 'Reset view')
Button_resetX.on_clicked(resetX)

start_index = 0
stop_index = num_start_points

fig.suptitle('Predicted Probabilities ' + sub_text + indices_to_time(start_index, stop_index), fontsize=20)

axis[-1].set_xlim(0, num_start_points)


# Plotting the actual classes (exercises) on the last axis:

# dictionary to get exercise abbreviations from exercise names in timetable
exercise_names = {'Raises Front':'RF',
                  'Raises Oblique':'RO',
                  'Raises Side':'RS',
                  'Rotation Wrist':'LR',
                  'Biceps Curls':'BC',
                  'Triceps Curls':'TC',
                  'Military Press':'MP',
                  'Shoulder Adduct.':'SA',
                  'PNF Diagonal 1':'P1',
                  'PNF Diagonal 2':'P2'}

# going through all exercises in the timetable
for ii, ex_name in enumerate(timetable_data.values[:,0]):
    
    # going through all repetition blocks in the timetable (5, 10 and 15 rep. blocks)
    for rep_col, start_col, stop_col in zip([1,2,3],[4,6,8],[5,7,9]): # corresponding columns
        rep_num = timetable_data.values[ii,rep_col]
        left_border = fmpm.convert_time_format_to_index(timetable_data.values[ii,start_col], 
                                                        sampling_rate=1/win_start_inc)
        right_border = fmpm.convert_time_format_to_index(timetable_data.values[ii,stop_col], 
                                                         sampling_rate=1/win_start_inc)
        # mark the corresponding area
        axis[-1].axvspan(left_border, right_border, color='y', alpha=0.3, lw=0)
        # write text to the corresponding area
        x_center = left_border + (right_border-left_border)/2 # x center of marked area
        axis[-1].text(x_center, 0.5, str(rep_num) + '\n' + exercise_names[ex_name], 
                      horizontalalignment='center', verticalalignment='center', fontsize=10, clip_on=True)
        
axis[-1].set_ylabel('Actual classes', rotation=0, fontsize=11)
axis[-1].yaxis.labelpad = 50

plt.show()

Using matplotlib backend: TkAgg
