In [1]:
import pandas as pd
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [2]:
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

# Reading the data

In [3]:
file_path_screwdriver = r"C:\Users\Daniel\OneDrive\Bachelorarbeit\Daten\Datenaufnahme\Schraubendreher\1\2021-02-08Griff_Schraubendreher_ml EULERWINKEL.json"
file_path_fist = r"C:\Users\Daniel\OneDrive\Bachelorarbeit\Daten\Datenaufnahme\Faust\1\2021-02-08Faust_ml EULERWINKEL.json"
file_path_list = [file_path_screwdriver,file_path_fist]
description_list = ["screwdriver","fist"]
df_list = []
for element in file_path_list:
    df_list.append(pd.read_json(element))

# Preprocessing

In [4]:
def func_df_preparation(df):
    df.drop(["userName","typeOfRecording","nameOfTask"],axis=1,inplace=True)
    df.sort_values(["timeStamp"],ignore_index=True,inplace=True)
    df.drop_duplicates(subset=["timeStamp"],keep="first",inplace=True,ignore_index=True)
    return df

In [5]:
for ind,element in enumerate(df_list):
    df_list[ind] = func_df_preparation(element)
    print("Number unique testing sequences {}:\t {}".format(description_list[ind],element["recordingNumber"].nunique()))

Number unique testing sequences screwdriver:	 9
Number unique testing sequences fist:	 10


## Rearranging the data + combine the datasets

columns = 6 parameters (3x acc and 3x angle) for each sensor
   rows = each row represents one moment
   
all unique testing sequences are combined

last column ["label"]:

    1 = screwdriver
    2 = fist



In [6]:
column_names_list = df_list[0].columns.tolist()
column_names_list.remove("timeStamp")
column_names_list.remove("frameNumber")
column_names_list.remove("recordingNumber")
column_names_list

['phi', 'theta', 'psi', 'accX', 'accY', 'accZ']

In [7]:
def func_all_sensors_list(df):
    n_sensors = len(df["psi"].iloc[0])
    all_sensors_list_df = []
    temp_df = df.copy()
    for sensor_index in range(n_sensors):
        for index, row in df.iterrows():
            for c_name in column_names_list:
                temp_df[c_name].iloc[index] = row[c_name][sensor_index]
        temp_df[column_names_list] = temp_df[column_names_list].astype(float)
        all_sensors_list_df.append(temp_df.copy())
    return all_sensors_list_df

In [8]:
all_sensors_list = []
for element in df_list:
    all_sensors_list.append(func_all_sensors_list(element))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


In [11]:
def func_index_recNum_list(df):
    index_recNum_list_df = []
    for recording in range(1,df["recordingNumber"].nunique()+1):
        index_recNum_list_df.append(df[df["recordingNumber"]==recording].first_valid_index())
    index_recNum_list_df.append(df.last_valid_index()+1)
    return index_recNum_list_df

In [12]:
index_recNum_list = []
for element in df_list:
    index_recNum_list.append(func_index_recNum_list(element))

In [13]:
def func_delta_time(df,index_recNum_list_df):
    delta_time = []
    for ind in range(1,len(index_recNum_list_df)):
        t1 = df["timeStamp"].iloc[index_recNum_list_df[ind-1]]
        t2 = df["timeStamp"].iloc[index_recNum_list_df[ind]-1]
        delta_time.append((t2-t1))
    arr = np.array(delta_time)
    print(arr.mean())

In [14]:
for ind,element in enumerate(df_list):
    func_delta_time(element,index_recNum_list[ind])

0 days 00:00:07.877888970
0 days 00:00:08.122999930


In [23]:
def func_df_data(df,all_sensors_list_df,label):
    data_df = df.copy()
    data_df = data_df[["recordingNumber","timeStamp","frameNumber"]]
    for ind_sensor in range(14):
        for ind_feat in range(6):
            col_name = column_names_list[ind_feat]+str(ind_sensor)
            data_df[col_name] = all_sensors_list_df[ind_sensor][column_names_list[ind_feat]]
    data_df["label"] = label
    return(data_df)

In [28]:
all_sensors_list[0][0].head()

Unnamed: 0,phi,theta,psi,accX,accY,accZ,timeStamp,frameNumber,recordingNumber
0,-3.099992,-0.251864,0.986599,0.052692,-0.037947,0.057099,2021-02-08 10:26:04.840999842,43291,1
1,-3.099264,-0.251283,0.986628,0.023943,-0.007743,0.020854,2021-02-08 10:26:04.874000072,43292,1
2,-3.098992,-0.251014,0.986708,0.044817,0.023827,0.030549,2021-02-08 10:26:04.907000065,43293,1
3,-3.098992,-0.251014,0.986708,0.044817,0.023827,0.030549,2021-02-08 10:26:04.940999985,43294,1
4,-3.098435,-0.250482,0.986756,0.024754,-0.005931,0.013268,2021-02-08 10:26:04.973999977,43295,1


In [25]:
index_recNum_list[1]

[0, 172, 378, 618, 873, 1107, 1317, 1593, 1879, 2167, 2447]

In [29]:
df_data_list = []
for ind,element in enumerate(df_list):
    df_data_list.append(func_df_data(element,all_sensors_list[ind],ind))
df_data_list[0].head()

Unnamed: 0,recordingNumber,timeStamp,frameNumber,phi0,theta0,psi0,accX0,accY0,accZ0,phi1,...,accX12,accY12,accZ12,phi13,theta13,psi13,accX13,accY13,accZ13,label
0,1,2021-02-08 10:26:04.840999842,43291,-3.099992,-0.251864,0.986599,0.052692,-0.037947,0.057099,-0.341761,...,-0.192187,-0.057048,0.335394,0.225838,-0.292762,0.669282,-0.224155,-0.062544,0.400648,0
1,1,2021-02-08 10:26:04.874000072,43292,-3.099264,-0.251283,0.986628,0.023943,-0.007743,0.020854,-0.342838,...,-0.080367,-0.088077,-0.171578,0.224768,-0.294493,0.67023,-0.040612,0.017532,-0.29434,0
2,1,2021-02-08 10:26:04.907000065,43293,-3.098992,-0.251014,0.986708,0.044817,0.023827,0.030549,-0.343988,...,0.079542,0.002126,-0.097973,0.224612,-0.294871,0.670486,0.106944,-0.051276,-0.009687,0
3,1,2021-02-08 10:26:04.940999985,43294,-3.098992,-0.251014,0.986708,0.044817,0.023827,0.030549,-0.345417,...,-0.051852,0.027972,-0.113384,0.225321,-0.297056,0.671869,-0.024738,-0.025348,-0.04535,0
4,1,2021-02-08 10:26:04.973999977,43295,-3.098435,-0.250482,0.986756,0.024754,-0.005931,0.013268,-0.345417,...,-0.040753,-0.01939,0.002805,0.226185,-0.297756,0.672643,-0.165805,-0.128674,-0.021833,0


In [30]:
result = pd.concat(df_data_list,ignore_index=True)
result.shape

(4583, 88)

In [31]:
for ind in range(1,len(index_recNum_list)):
    index_recNum_list[ind] = [x+index_recNum_list[ind-1][-1] for x in index_recNum_list[ind]]
index_recNum_list[1]

[2136, 2308, 2514, 2754, 3009, 3243, 3453, 3729, 4015, 4303, 4583]

In [32]:
result.sample(10)

Unnamed: 0,recordingNumber,timeStamp,frameNumber,phi0,theta0,psi0,accX0,accY0,accZ0,phi1,...,accX12,accY12,accZ12,phi13,theta13,psi13,accX13,accY13,accZ13,label
2424,2,2021-02-08 10:31:55.842000008,53821,-3.10017,-0.261886,1.00398,-0.000449,-0.039158,0.018528,-0.341875,...,0.146335,0.035267,-0.057158,1.79829,-0.205046,1.296788,-0.141447,0.02103,0.201799,1
4150,9,2021-02-08 10:33:37.143000126,56860,-3.099828,-0.261349,0.997714,0.007344,0.021886,-0.038288,-0.372454,...,0.053613,-0.023322,0.188311,2.073994,-0.175896,1.201821,0.005916,-0.018131,0.217075,1
4189,9,2021-02-08 10:33:38.443000078,56899,-3.099828,-0.261349,0.997714,0.013943,0.014708,0.001119,-0.372454,...,0.096544,0.021119,0.272121,2.053861,-0.170895,1.226201,0.046352,-0.019188,0.335485,1
3723,7,2021-02-08 10:33:09.875999928,56042,-3.101188,-0.263633,0.998682,-0.020056,-0.024228,-0.004223,-0.367063,...,-0.019726,0.10577,-0.203025,0.221373,-0.239263,0.964037,0.086673,0.205942,-0.286282,1
2017,9,2021-02-08 10:28:03.974999905,46865,-3.097827,-0.244636,0.983102,0.001482,0.015993,0.007902,-0.378626,...,0.059852,0.068861,0.12976,1.04339,-0.265249,0.672205,0.040732,0.090539,0.137056,0
4336,10,2021-02-08 10:33:50.042999983,57247,-3.100055,-0.260527,0.996268,-0.004119,-0.00127,0.027561,-0.366326,...,-0.004968,-0.089485,0.069394,0.178919,-0.23213,0.949645,-0.042936,-0.036387,0.112022,1
1761,8,2021-02-08 10:27:49.874000072,46442,-3.098015,-0.241608,0.983016,0.006839,0.012412,-0.024655,-0.377448,...,0.56281,0.948505,-0.475616,0.639445,-0.239223,0.594165,0.853586,1.086951,-0.43274,0
903,4,2021-02-08 10:26:56.873999834,44852,-3.094464,-0.245994,0.982773,0.030986,-0.023531,-0.006521,-0.373449,...,0.175212,0.018525,0.20457,0.145906,-0.224339,0.513166,-0.435836,-0.089084,-0.497233,0
2654,3,2021-02-08 10:32:08.842000008,54211,-3.099618,-0.262175,1.003901,0.004788,0.016314,-0.015652,-0.34976,...,-0.05409,0.001441,-0.209162,2.070259,-0.22969,1.381381,-0.017508,0.001578,-0.301358,1
4201,9,2021-02-08 10:33:38.844000101,56911,-3.099828,-0.261349,0.997714,0.00985,0.008367,-0.0009,-0.372454,...,-1.44227,1.312006,-2.151243,0.391658,-0.231971,1.010347,-0.719781,0.804069,-0.974568,1


In [33]:
start_ind_right_hand = 3+6*7
pure_data = result[result.columns[start_ind_right_hand:]]
pure_data

Unnamed: 0,phi7,theta7,psi7,accX7,accY7,accZ7,phi8,theta8,psi8,accX8,...,accX12,accY12,accZ12,phi13,theta13,psi13,accX13,accY13,accZ13,label
0,0.080651,0.148847,-0.959140,-0.049675,0.134449,0.103026,-0.283347,-0.518190,-0.919459,-0.056956,...,-0.192187,-0.057048,0.335394,0.225838,-0.292762,0.669282,-0.224155,-0.062544,0.400648,0
1,0.080651,0.148847,-0.959140,-0.049675,0.134449,0.103026,-0.286494,-0.523345,-0.917932,-0.138175,...,-0.080367,-0.088077,-0.171578,0.224768,-0.294493,0.670230,-0.040612,0.017532,-0.294340,0
2,0.083520,0.145020,-0.958043,0.155792,-0.073801,-0.054326,-0.288562,-0.524751,-0.916598,-0.120184,...,0.079542,0.002126,-0.097973,0.224612,-0.294871,0.670486,0.106944,-0.051276,-0.009687,0
3,0.084042,0.144297,-0.957704,0.083194,-0.060714,-0.035120,-0.289412,-0.525137,-0.915855,-0.085881,...,-0.051852,0.027972,-0.113384,0.225321,-0.297056,0.671869,-0.024738,-0.025348,-0.045350,0
4,0.084970,0.142817,-0.957260,0.080727,-0.063714,-0.082944,-0.291794,-0.527175,-0.915196,-0.099641,...,-0.040753,-0.019390,0.002805,0.226185,-0.297756,0.672643,-0.165805,-0.128674,-0.021833,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4578,-0.000788,0.131263,-0.888502,-0.076930,-0.029085,0.101990,-0.383999,-0.487630,-0.916883,-0.021625,...,-0.093461,-0.024301,0.113809,0.188865,-0.240217,0.950502,-0.041030,-0.013941,0.176515,1
4579,0.000603,0.129653,-0.889342,-0.013478,0.004246,-0.029982,-0.383999,-0.487630,-0.916883,-0.021625,...,-0.032861,-0.036520,-0.231586,0.188396,-0.240344,0.948447,-0.009833,0.002942,-0.117649,1
4580,0.001227,0.128868,-0.889633,0.012872,-0.102590,-0.082346,-0.383633,-0.488823,-0.918637,-0.029767,...,0.083403,-0.104163,0.091948,0.187876,-0.240420,0.947173,0.068939,-0.039344,-0.001130,1
4581,0.001227,0.128868,-0.889633,0.012872,-0.102590,-0.082346,-0.383148,-0.490075,-0.920805,0.028057,...,-0.002063,-0.048087,-0.077849,0.187152,-0.240386,0.945828,-0.051900,-0.089484,-0.055887,1


In [35]:
X_test_noMovement = result.loc[index_recNum_list[0][0]:index_recNum_list[0][1]].values
X_test_noMovement = X_test_noMovement[:,3:(3+6*7)]
X_test_noMovement.shape

(254, 42)

# Classification

In [36]:
models = []
models.append(("LR",LogisticRegression()))
models.append(("LDA",LinearDiscriminantAnalysis()))
models.append(("KNN",KNeighborsClassifier()))
models.append(("CART",DecisionTreeClassifier()))
models.append(("NB",GaussianNB()))
models.append(("SVM",SVC()))

In [37]:
# shuffle
cols = list(pure_data.columns[:-1])
X = pure_data[cols].values
Y = pure_data["label"].values
X_train_shuffle,X_test_shuffle,Y_train_shuffle,Y_test_shuffle = model_selection.train_test_split(
    X,Y,test_size=0.8,shuffle=True,random_state=42)

In [38]:
results = []
names = []
for name, model in models:
    print(name+"\n")
    model.fit(X_train_shuffle,Y_train_shuffle)
    prediction = model.predict(X_test_shuffle)
    print(accuracy_score(Y_test_shuffle,prediction))
    print(confusion_matrix(Y_test_shuffle,prediction))
    print(classification_report(Y_test_shuffle,prediction))
    print("\n")
    print("\n")
    print("\n")

LR

0.9991818925552223
[[1708    0]
 [   3 1956]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1708
           1       1.00      1.00      1.00      1959

    accuracy                           1.00      3667
   macro avg       1.00      1.00      1.00      3667
weighted avg       1.00      1.00      1.00      3667







LDA

1.0
[[1708    0]
 [   0 1959]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1708
           1       1.00      1.00      1.00      1959

    accuracy                           1.00      3667
   macro avg       1.00      1.00      1.00      3667
weighted avg       1.00      1.00      1.00      3667







KNN

0.9893646032178893
[[1691   17]
 [  22 1937]]
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      1708
           1       0.99      0.99      0.99      1959

    accuracy                     

In [39]:
# testing input shape
pure_data.shape[0]-(X_train_shuffle.shape[0]+X_test_shuffle.shape[0])

0

## accuracy is pretty good -> only the acc values are used

In [40]:
cols_acc = []
for col in pure_data.columns:
    if "phi" not in col and "theta" not in col and "psi" not in col and "label" not in col:
        cols_acc.append(col)

In [41]:
X = pure_data[cols_acc]
X_train_shuffle,X_test_shuffle,Y_train_shuffle,Y_test_shuffle = model_selection.train_test_split(
    X,Y,test_size=0.8,shuffle=True,random_state=42)

In [42]:
results = []
names = []
for name, model in models:
    print(name+"\n")
    model.fit(X_train_shuffle,Y_train_shuffle)
    prediction = model.predict(X_test_shuffle)
    print(accuracy_score(Y_test_shuffle,prediction))
    print(confusion_matrix(Y_test_shuffle,prediction))
    print(classification_report(Y_test_shuffle,prediction))
    print("\n")
    print("\n")
    print("\n")

LR

0.5418598309244614
[[ 314 1394]
 [ 286 1673]]
              precision    recall  f1-score   support

           0       0.52      0.18      0.27      1708
           1       0.55      0.85      0.67      1959

    accuracy                           0.54      3667
   macro avg       0.53      0.52      0.47      3667
weighted avg       0.54      0.54      0.48      3667







LDA

0.5434960458140169
[[ 374 1334]
 [ 340 1619]]
              precision    recall  f1-score   support

           0       0.52      0.22      0.31      1708
           1       0.55      0.83      0.66      1959

    accuracy                           0.54      3667
   macro avg       0.54      0.52      0.48      3667
weighted avg       0.54      0.54      0.50      3667







KNN

0.7281156258521952
[[1206  502]
 [ 495 1464]]
              precision    recall  f1-score   support

           0       0.71      0.71      0.71      1708
           1       0.74      0.75      0.75      1959

    accuracy      

In [43]:
models

[('LR', LogisticRegression()),
 ('LDA', LinearDiscriminantAnalysis()),
 ('KNN', KNeighborsClassifier()),
 ('CART', DecisionTreeClassifier()),
 ('NB', GaussianNB()),
 ('SVM', SVC())]

In [44]:
from sklearn_porter import Porter



In [45]:
clf = DecisionTreeClassifier()
clf.fit(X_train_shuffle,Y_train_shuffle)

DecisionTreeClassifier()

In [46]:
porter = Porter(clf, language='java')
output = porter.export(embed_data=True)
print(output)

class DecisionTreeClassifier {

    private static int findMax(int[] nums) {
        int index = 0;
        for (int i = 0; i < nums.length; i++) {
            index = nums[i] > nums[index] ? i : index;
        }
        return index;
    }

    public static int predict(double[] features) {
        int[] classes = new int[2];
            
        if (features[7] <= -0.05785500071942806) {
            if (features[12] <= 0.053450001403689384) {
                if (features[9] <= -0.0041810000548139215) {
                    if (features[6] <= -0.18997200578451157) {
                        if (features[20] <= -0.6787029802799225) {
                            classes[0] = 0; 
                            classes[1] = 1; 
                        } else {
                            classes[0] = 5; 
                            classes[1] = 0; 
                        }
                    } else {
                        if (features[7] <= -0.14100050181150436) {
                         

