# Same Subject for Training and Testing

In [1]:
import numpy as np
import pandas as pd
import sqlite3
import os
import skinematics as skin
import matplotlib.pyplot as plt
import functionsMasterProjectMeinhart as fmpm

## Training Data

In [2]:
# load all data from one subject for training 
# --> except the first 5 repetitions from each block as well as non-exercise data with sequence num > 5
eval_subject = 1

num_test_repetitions = 5

db_name='DataBase_Physio_with_nonEx.db' # database name
exercise_abbrs = ['RF','RO','RS','LR','BC','TC','MP','SA','P1','P2','NE'] # exercise abbreviations
# Connect to an existing database
conn = sqlite3.connect(db_name)
cur = conn.cursor()
train_data_points = {} # dictionary with the exercise abbreviation as key
for key in exercise_abbrs:
    # sql command to extract data
    query_sql = """
        SELECT r.start_time, r.stop_time, e.csv_file
        FROM subjects s
        INNER JOIN exercises e
        ON s.id = e.subject_id
        INNER JOIN paradigms p
        ON p.id = e.paradigm_id
        INNER JOIN repetitions r
        ON e.id = r.exercise_id
        WHERE p.abbreviation = '{}'
        AND s.id = {}
        AND sequence_num > {}
        """.format(key, eval_subject, num_test_repetitions)
    # get data from data base and close connection
    train_data_points[key] = pd.read_sql_query(query_sql, conn)
conn.close()

print('Length of the individual data frames:')
num_data_points_train = 0
for key in exercise_abbrs:
    print(key + ':\t' + str(train_data_points[key].shape[0]))
    num_data_points_train += train_data_points[key].shape[0]
print('total:\t' + str(num_data_points_train))

Length of the individual data frames:
RF:	15
RO:	15
RS:	15
LR:	15
BC:	16
TC:	15
MP:	15
SA:	16
P1:	15
P2:	15
NE:	252
total:	404


In [3]:
# number of sections to split the signals
number_sections = 10

sampling_rate = 256 # [Hz]
sig_names = ['Acc','Gyr'] # signals which shall be considered for the mean calculation
csv_dir='E:\Physio_Data_Split_Ex_and_NonEx' # directory of csv file
exercise_abbrs = ['RF','RO','RS','LR','BC','TC','MP','SA','P1','P2','NE']
exercise_dict = {ex: ii for ii, ex in enumerate(exercise_abbrs)}

In [4]:
X_train = np.zeros((num_data_points_train, number_sections*6))
y_train = np.zeros(num_data_points_train, dtype=np.int8)

count = 0
y_pos = 0
prev_prog = 0
max_num = num_data_points_train

# go through all exercises
for ex in exercise_abbrs:
    
    # go through all repetitions (data points) of the current exercise
    for ii in range(len(train_data_points[ex])):

        # join file path
        file_path = os.path.join(csv_dir, train_data_points[ex]['csv_file'][ii])

        # load the signal data of the corresponding time range of the current repetition
        selected_data = fmpm.get_sensor_data(in_file = file_path, 
                                             signals = sig_names, 
                                             sampling_rate = sampling_rate, 
                                             start_time = float(train_data_points[ex]['start_time'][ii]), 
                                             stop_time = float(train_data_points[ex]['stop_time'][ii]))
        

        # calculate the corresponding section means of the current repetition
        section_means = fmpm.split_range_into_sections(signal_data = selected_data,
                                                       num_sec = number_sections,
                                                       signals = sig_names)
        
        # generate features
        col = 0
        for sig in sig_names:
            for jj in [0,1,2]: # x, y, z comp. of the corresponding signal
                X_train[count, col:col+number_sections] = section_means[sig][:,jj]
                col += number_sections
        
        count += 1
        prev_prog = fmpm.print_progress(count, max_num, prev_prog)
    
    label = exercise_dict[ex]
    y_train[y_pos:y_pos+len(train_data_points[ex])] = label
    y_pos += len(train_data_points[ex])

Progress: 100%


## Test Data

In [5]:
# load all 5 repetition blocks from one subject for testing

# connect to an existing database
conn = sqlite3.connect(db_name)
cur = conn.cursor()
test_data_points = {} # dictionary with the exercise abbreviation as key
for key in exercise_abbrs:
    # sql command to extract data
    query_sql = """
        SELECT r.start_time, r.stop_time, e.csv_file
        FROM subjects s
        INNER JOIN exercises e
        ON s.id = e.subject_id
        INNER JOIN paradigms p
        ON p.id = e.paradigm_id
        INNER JOIN repetitions r
        ON e.id = r.exercise_id
        WHERE p.abbreviation = '{}'
        AND s.id = {}
        AND sequence_num <= {}
        """.format(key, eval_subject, num_test_repetitions)
    # get data from data base and close connection
    test_data_points[key] = pd.read_sql_query(query_sql, conn)
conn.close()

print('Length of the individual data frames:')
num_data_points_test = 0
for key in exercise_abbrs:
    print(key + ':\t' + str(test_data_points[key].shape[0]))
    num_data_points_test += test_data_points[key].shape[0]
print('total:\t' + str(num_data_points_test))

Length of the individual data frames:
RF:	15
RO:	15
RS:	15
LR:	15
BC:	15
TC:	15
MP:	15
SA:	15
P1:	15
P2:	15
NE:	155
total:	305


In [6]:
def rotate_signal(signal_data, axis=0, rot_angle=90, signals=['Acc','Gyr']):
    
    if rot_angle != 0:
        
        # if no signals are given as keys, select all keys of the input dictionary
        if signals is None:
            signals = [*signal_data]
        
        # create rotation matrix
        R = skin.rotmat.R(axis=axis, angle=rot_angle)
        
        # dictionary for rotated data
        rot_signal_data = {}
        
        # rotate the signals
        for sig in signals: 
            rot_signal_data[sig] = (R @ signal_data[sig].T).T
    else:
        rot_signal_data = signal_data
        
    return rot_signal_data

In [7]:
X_test = np.zeros((num_data_points_test, number_sections*6))
y_test = np.zeros(num_data_points_test, dtype=np.int8)

rot_axis = 0 # 0,1,2 --> x,y,z
rot_angle = 0 # deg

count = 0
y_pos = 0
prev_prog = 0
max_num = num_data_points_test

# go through all exercises
for ex in exercise_abbrs:
    
    # go through all repetitions (data points) of the current exercise
    for ii in range(len(test_data_points[ex])):

        # join file path
        file_path = os.path.join(csv_dir, test_data_points[ex]['csv_file'][ii])

        # load the signal data of the corresponding time range of the current repetition
        selected_data = fmpm.get_sensor_data(in_file = file_path, 
                                             signals = sig_names, 
                                             sampling_rate = sampling_rate, 
                                             start_time = float(test_data_points[ex]['start_time'][ii]), 
                                             stop_time = float(test_data_points[ex]['stop_time'][ii]))
        
        # rotate the signals
        rot_data = rotate_signal(selected_data, axis=rot_axis, rot_angle=rot_angle, signals=['Acc','Gyr'])
        
        # calculate the corresponding section means of the current repetition
        section_means = fmpm.split_range_into_sections(signal_data = rot_data,
                                                       num_sec = number_sections,
                                                       signals = sig_names)
        
        # generate features
        col = 0
        for sig in sig_names:
            for jj in [0,1,2]: # x, y, z comp. of the corresponding signal
                X_test[count, col:col+number_sections] = section_means[sig][:,jj]
                col += number_sections
        
        count += 1
        prev_prog = fmpm.print_progress(count, max_num, prev_prog)
    
    label = exercise_dict[ex]
    y_test[y_pos:y_pos+len(test_data_points[ex])] = label
    y_pos += len(test_data_points[ex])

Progress: 100%


## Classification

In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [9]:
# create random forest classifier
ML_model = RandomForestClassifier(n_estimators=500, max_leaf_nodes=40, n_jobs=-1, random_state=42)

# train the model
ML_model.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=40,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=-1,
            oob_score=False, random_state=42, verbose=0, warm_start=False)

In [10]:
# predict labels
y_pred = ML_model.predict(X_test)

# show results
print('Model: ' + ML_model.__class__.__name__ + '\n')
print('Features: Means of {} sections per signal (3 x Acc + 3 x Gyr) --> {} features\n'.format(number_sections,
                                                                                              number_sections*6))
print('Total Accuracy: {:.5f}'.format((accuracy_score(y_test, y_pred))))
print('\n')
fmpm.print_precision_recall_accuracy(y_pred, y_test)
print('\n')
fmpm.print_misclassified_data_points(y_pred, y_test)

Model: RandomForestClassifier

Features: Means of 10 sections per signal (3 x Acc + 3 x Gyr) --> 60 features

Total Accuracy: 0.98033


Exercise	Precision [%]	Recall [%]	Accuracy[%]
  RF		   83.33	  100.00	   99.02
  RO		  100.00	   66.67	   98.36
  RS		  100.00	  100.00	  100.00
  LR		  100.00	   93.33	   99.67
  BC		  100.00	  100.00	  100.00
  TC		  100.00	  100.00	  100.00
  MP		  100.00	  100.00	  100.00
  SA		  100.00	  100.00	  100.00
  P1		  100.00	  100.00	  100.00
  P2		  100.00	  100.00	  100.00
  NE		   98.10	  100.00	   99.02


6 misclassified (305 test data points):
RO classified as NE
RO classified as RF
RO classified as RF
RO classified as NE
RO classified as RF
LR classified as NE
