# Classification with both Physio Data Classes

In [None]:
import pandas as pd
import numpy as np
from pivottablejs import pivot_ui

import sys
sys.path.append('..')  # in order to import modules from my own package
from packageMeinhart import PhysioDataHandler as PDH
from packageMeinhart.functionsMasterProjectMeinhart import print_precision_recall_accuracy
from packageMeinhart.functionsMasterProjectMeinhart import print_misclassified_data_points

# ML modules
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

In [None]:
PD1 = PDH.PhysioData_SectionFeatures(num_sections=10,
                                     test_subject_ids=1,
                                     train_subject_ids=2,
                                     test_rep_nums=-1,
                                     train_rep_nums=-1,
                                     test_ex_abbrs=-1,
                                     train_ex_abbrs=-1,
                                     with_non_Ex=True,
                                     rot_axis_test_data=0,
                                     rot_angle_test_data=0,
                                     add_noise_test_data=False,
                                     add_noise_train_data=False,
                                     snr_db=20,
                                     csv_data_dir='E:\Physio_Data_Split_Ex_and_NonEx',
                                     csv_skiprows=0,
                                     csv_separator=',',
                                     data_base_path='E:\Physio_Data\DataBase_Physio_with_nonEx.db',
                                     print_progress=True,
                                     signal_abbrs=['Acc','Gyr'],
                                     signal_orientations=['x','y','z'],
                                     labels_abbr2num_dict={'RF':0,'RO':1,'RS':2,'LR':3,'BC':4,'TC':5,
                                                           'MP':6,'SA':7,'P1':8,'P2':9,'NE':10},
                                     sub_id_key='subject_id',
                                     num_rep_key='num_rep',
                                     abbreviation_key='abbreviation',
                                     start_time_key='start_time',
                                     stop_time_key='stop_time',
                                     csv_file_key='csv_file',
                                     sampling_rate=256)

In [None]:
pivot_ui(PD1.get_test_data_points(), 
         rows=['abbreviation'], 
         cols=['subject_id', 'num_rep'], 
         outfile_path="PD1_test.html")

In [None]:
pivot_ui(PD1.get_train_data_points(), 
         rows=['abbreviation'], 
         cols=['subject_id', 'num_rep'], 
         outfile_path="PD1_train.html")

In [None]:
# create ML model
#ML_model = RandomForestClassifier(n_estimators=500, max_leaf_nodes=40, n_jobs=-1, random_state=42)
ML_model = make_pipeline(StandardScaler(), SVC(random_state=42, probability=True)) # Support Vector Classifier with input scaling
#ML_model = make_pipeline(StandardScaler(), LDA()) # Linear Discriminant Analysis with input scaling
#ML_model = make_pipeline(StandardScaler(), LogisticRegression(random_state=42, 
#                                                              solver='lbfgs', 
#                                                              multi_class='multinomial',
#                                                              max_iter=1000,
#                                                              n_jobs=-1)) # Logistic Regression with input scaling
#ML_model = make_pipeline(StandardScaler(), MLPClassifier(hidden_layer_sizes=(100), 
#                                                         activation='relu', 
#                                                         solver='adam', # 'adam''lbfgs'
#                                                         alpha=0.2, 
#                                                         batch_size='auto', 
#                                                         learning_rate='constant', 
#                                                         learning_rate_init=0.001, 
#                                                         max_iter=500, 
#                                                         shuffle=True, 
#                                                         random_state=42))

# train the model
ML_model.fit(PD1.X_train(), PD1.y_train())

# predict labels
y_pred = ML_model.predict(PD1.X_test())

In [12]:
# show results
print('Model: ' + type(ML_model).__name__ + '\n')
print('Total Accuracy: {:.2f}%\n'.format((accuracy_score(PD1.y_test(), y_pred))*100))
#print_precision_recall_accuracy(y_pred, PD1.y_test())
report = classification_report(PD1.y_test(), y_pred, 
                               labels=np.arange(0,11),
                               target_names=['RF','RO','RS','LR','BC','TC','MP','SA','P1','P2','NE'],
                               sample_weight=None, output_dict=True)
report_df = pd.DataFrame.from_dict(report, orient='index')
display(report_df)
print('')
print_misclassified_data_points(y_pred, PD1.y_test())

Model: Pipeline

Total Accuracy: 95.35%



Unnamed: 0,precision,recall,f1-score,support
BC,1.0,1.0,1.0,31
LR,1.0,1.0,1.0,30
MP,0.810811,1.0,0.895522,30
NE,0.995025,0.982801,0.988875,407
P1,1.0,1.0,1.0,30
P2,1.0,0.5,0.666667,30
RF,0.731707,1.0,0.84507,30
RO,1.0,0.633333,0.77551,30
RS,0.697674,1.0,0.821918,30
SA,1.0,1.0,1.0,31



33 misclassified (709 test data points):
RO classified as RF
RO classified as RF
RO classified as RF
RO classified as RF
RO classified as RF
RO classified as RF
RO classified as RF
RO classified as RF
RO classified as RF
RO classified as RF
RO classified as RF
P2 classified as RS
P2 classified as NE
P2 classified as RS
P2 classified as RS
P2 classified as RS
P2 classified as RS
P2 classified as RS
P2 classified as RS
P2 classified as RS
P2 classified as RS
P2 classified as RS
P2 classified as RS
P2 classified as RS
P2 classified as RS
P2 classified as NE
NE classified as MP
NE classified as MP
NE classified as MP
NE classified as MP
NE classified as MP
NE classified as MP
NE classified as MP


In [None]:
PD1_wp = PDH.PhysioData_WindowingProcedure(test_subject_dir  = r'E:\Physio_Data\Subject_01',
                                           test_subject_file = 'subject01.csv',
                                           number_sections=10,
                                           signal_abbrs=['Acc','Gyr'],
                                           signal_orientations=['x','y','z'],
                                           sampling_rate=256,
                                           cutoff=10,
                                           order=6,
                                           win_start_inc=0.2,
                                           win_stretch_inc=0.2,
                                           win_min_len=1,
                                           win_max_len=5,
                                           win_start_min_sec='00:00.0', # '22:05.0'
                                           win_last_start_min_sec='05:00.0', #'22:44.8',
                                           print_progress=True,
                                           progress_info='Generate feature map...',
                                           rot_axis=0,
                                           rot_angle=0,
                                           add_noise=False,
                                           target_snr_db=20,
                                           csv_skiprows=0,
                                           csv_separator=',',
                                           exercise_abbrs=['RF','RO','RS','LR','BC','TC','MP','SA','P1','P2','NE'],
                                           exercise_abbrs_peak_eval=['RF','RO','RS','LR','BC','TC','MP','SA','P1','P2'])

np.shape(PD1_wp.get_feature_map())

In [None]:
pred_probs = ML_model.predict_proba(PD1_wp.get_feature_map())
np.shape(pred_probs)

In [None]:
PD1_wp.evaluate_probability_matrix(pred_probabilities=pred_probs,
                                   max_time_between_peaks=6,
                                   min_peaks_per_block=3,
                                   threshold_prob=0.6,
                                   footprint_length=3,
                                   print_rep_len_prob=True)

In [None]:
%matplotlib auto
PD1_wp.plot_probability_matrices_and_peaks(title_text='Predicted Probabilites Subject 1',
                                           figsize=(10,8),
                                           cross_size=10,
                                           fontsize_title=20,
                                           yticks_step_in_s=1.5,
                                           fontsize_yticks=12,
                                           fontsize_ylabels_ex=16,
                                           labelpad_ex=45,
                                           fontsize_actual_classes=12,
                                           fontsize_actual_classes_label=12,
                                           labelpad_actual_classes=50,
                                           fontsize_window_length=16,
                                           xpos_window_length=0.055,
                                           ypos_window_length=0.6,
                                           fontsize_time_xlabel=16,
                                           fontsize_time_xticks=16,
                                           colorbar_position_x_y_length_heigth=[0.93, 0.255, 0.01, 0.625],
                                           fontsize_colorbar_ticks=12,
                                           interactive_plot=False,
                                           plot_time_range=True,
                                           start_time='01:00.0',
                                           stop_time='03:00.0',
                                           plot_actual_classes=True,
                                           timetable_file_dir = r'E:\Physio_Data\Exercise_time_tables',
                                           timetable_file_name = 'Timetable_subject01.txt')

In [None]:
PD1_wp.plot_probability_matrices_and_peaks(title_text='Predicted Probabilites Subject 1')