# Classification with both Physio Data Classes

In [1]:
import pandas as pd
import numpy as np
from pivottablejs import pivot_ui

import sys
sys.path.append('..')  # in order to import modules from my own package
from packageMeinhart import PhysioDataHandler as PDH
from packageMeinhart.functionsMasterProjectMeinhart import print_precision_recall_accuracy
from packageMeinhart.functionsMasterProjectMeinhart import print_misclassified_data_points

# ML modules
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

In [2]:
PD1 = PDH.PhysioData_SectionFeatures(num_sections=10,
                                     test_subject_ids=1,
                                     train_subject_ids=-1,
                                     test_rep_nums=-1,
                                     train_rep_nums=-1,
                                     test_ex_abbrs=-1,
                                     train_ex_abbrs=-1,
                                     with_non_Ex=True,
                                     rot_axis_test_data=0,
                                     rot_angle_test_data=0,
                                     add_noise_test_data=False,
                                     add_noise_train_data=False,
                                     snr_db=20,
                                     csv_data_dir='E:\Physio_Data_Split_Ex_and_NonEx',
                                     csv_skiprows=0,
                                     csv_separator=',',
                                     data_base_path='E:\Physio_Data\DataBase_Physio_with_nonEx.db',
                                     print_progress=True,
                                     signal_abbrs=['Acc','Gyr'],
                                     signal_orientations=['x','y','z'],
                                     labels_abbr2num_dict={'RF':0,'RO':1,'RS':2,'LR':3,'BC':4,'TC':5,
                                                           'MP':6,'SA':7,'P1':8,'P2':9,'NE':10},
                                     sub_id_key='subject_id',
                                     num_rep_key='num_rep',
                                     abbreviation_key='abbreviation',
                                     start_time_key='start_time',
                                     stop_time_key='stop_time',
                                     csv_file_key='csv_file',
                                     sampling_rate=256)

In [3]:
pivot_ui(PD1.get_test_data_points(), 
         rows=['abbreviation'], 
         cols=['subject_id', 'num_rep'], 
         outfile_path="PD1_test.html")

In [4]:
pivot_ui(PD1.get_train_data_points(), 
         rows=['abbreviation'], 
         cols=['subject_id', 'num_rep'], 
         outfile_path="PD1_train.html")

In [5]:
# create ML model
ML_model = RandomForestClassifier(n_estimators=500, max_leaf_nodes=40, n_jobs=-1, random_state=42)
#ML_model = make_pipeline(StandardScaler(), SVC(random_state=42)) # Support Vector Classifier with input scaling

# train the model
ML_model.fit(PD1.X_train(), PD1.y_train())

# predict labels
y_pred = ML_model.predict(PD1.X_test())

In [6]:
# show results
print('Model: ' + type(ML_model).__name__ + '\n')
print('Total Accuracy: {:.2f}%\n'.format((accuracy_score(PD1.y_test(), y_pred))*100))
#print_precision_recall_accuracy(y_pred, PD1.y_test())
report = classification_report(PD1.y_test(), y_pred, 
                               labels=np.arange(0,11),
                               target_names=['RF','RO','RS','LR','BC','TC','MP','SA','P1','P2','NE'],
                               sample_weight=None, output_dict=True)
report_df = pd.DataFrame.from_dict(report, orient='index')
display(report_df)
print('')
print_misclassified_data_points(y_pred, PD1.y_test())

Model: RandomForestClassifier

Total Accuracy: 99.01%



Unnamed: 0,precision,recall,f1-score,support
BC,1.0,1.0,1.0,31
LR,1.0,1.0,1.0,30
MP,0.857143,1.0,0.923077,30
NE,0.997519,0.987715,0.992593,407
P1,1.0,0.966667,0.983051,30
P2,1.0,1.0,1.0,30
RF,1.0,0.966667,0.983051,30
RO,0.967742,1.0,0.983607,30
RS,1.0,1.0,1.0,30
SA,1.0,1.0,1.0,31



7 misclassified (709 test data points):
RF classified as RO
P1 classified as NE
NE classified as MP
NE classified as MP
NE classified as MP
NE classified as MP
NE classified as MP


In [7]:
PD1_wp = PDH.PhysioData_WindowingProcedure(test_subject_dir  = r'E:\Physio_Data\Subject_01',
                                           test_subject_file = 'subject01.csv',
                                           number_sections=10,
                                           signal_abbrs=['Acc','Gyr'],
                                           signal_orientations=['x','y','z'],
                                           sampling_rate=256,
                                           cutoff=10,
                                           order=6,
                                           win_start_inc=0.2,
                                           win_stretch_inc=0.2,
                                           win_min_len=1,
                                           win_max_len=5,
                                           win_start_min_sec='00:00.0',
                                           win_last_start_min_sec=None,
                                           print_progress=True,
                                           progress_info='Generate feature map...',
                                           rot_axis=0,
                                           rot_angle=0,
                                           add_noise=False,
                                           target_snr_db=20,
                                           csv_skiprows=0,
                                           csv_separator=',',
                                           exercise_abbrs=['RF','RO','RS','LR','BC','TC','MP','SA','P1','P2','NE'],
                                           exercise_abbrs_peak_eval=['RF','RO','RS','LR','BC','TC','MP','SA','P1','P2'])

np.shape(PD1_wp.get_feature_map())

Generate feature map... 100%


(210945, 60)

In [8]:
pred_probs = ML_model.predict_proba(PD1_wp.get_feature_map())
np.shape(pred_probs)

(210945, 11)

In [9]:
PD1_wp.evaluate_probability_matrix(pred_probabilities=pred_probs,
                                   max_time_between_peaks=10,
                                   min_peaks_per_block=3,
                                   threshold_prob=0.5,
                                   footprint_length=1.5,
                                   print_rep_len_prob=True)


Exercise: RF
Number of blocks: 3

	Block #1:
		Repetitions: 10
		Time range: 13:44.2 - 14:13.8
		Repetition lengths [s] and predicted prob.: 
			  1	2.80	(0.716)
			  2	3.20	(0.813)
			  3	3.20	(0.837)
			  4	2.80	(0.673)
			  5	2.80	(0.822)
			  6	3.00	(0.656)
			  7	3.00	(0.619)
			  8	3.40	(0.577)
			  9	3.60	(0.517)
			 10	3.60	(0.621)
	Block #2:
		Repetitions: 15
		Time range: 14:39.6 - 15:19.2
		Repetition lengths [s] and predicted prob.: 
			  1	2.80	(0.703)
			  2	3.20	(0.784)
			  3	2.80	(0.864)
			  4	3.00	(0.858)
			  5	2.80	(0.812)
			  6	2.60	(0.725)
			  7	3.00	(0.759)
			  8	3.00	(0.823)
			  9	2.80	(0.788)
			 10	2.80	(0.717)
			 11	2.60	(0.802)
			 12	2.80	(0.828)
			 13	3.00	(0.750)
			 14	3.00	(0.723)
			 15	2.80	(0.784)
	Block #3:
		Repetitions: 5
		Time range: 16:08.4 - 16:22.6
		Repetition lengths [s] and predicted prob.: 
			  1	3.00	(0.729)
			  2	3.20	(0.782)
			  3	3.00	(0.794)
			  4	2.80	(0.649)
			  5	3.20	(0.838)

Exercise: RO
Number of blocks: 3

	Block 

In [10]:
%matplotlib auto
PD1_wp.plot_probability_matrices_and_peaks(test_subject_id=1,
                                           figsize=(18,9),
                                           cross_size=10,
                                           plot_actual_classes=True,
                                           timetable_file_dir = r'E:\Physio_Data\Exercise_time_tables',
                                           timetable_file_name = 'Timetable_subject01.txt')

Using matplotlib backend: TkAgg
