In [48]:
import pandas as pd
import numpy as np
import os

RIGHT_HAND_ALONE = r'extraFiles\HandRight.csv'

## 1. cleaning our data

In [49]:
'''
Rename our columns. by removing the whitespace in the col name and optional to add prefix to the col name.
'''
def rename_columns(columns, append_name=None):
    cols = []
    for col in columns:

        col = str(col).strip()
        if append_name and col != 'Frame ID':
            col = append_name + '_' + col
        cols.append(col)
    return cols

In [50]:
'''
This function is to clean our csv for right hand. this csv we use to append in other alone.
'''
def clean_right_alone(csv_path):
    df = pd.read_csv(csv_path)
    df.columns = rename_columns(df.columns, append_name='right')
    df = df[(df['right_Hand Type'] == 'right') & (df['right_# hands'] == 1)].drop(['right_Time'], axis=1)
    return df

In [51]:
'''
function to merge the alone left hand csv with right hand 
'''
def create_alone_dataframes(csv_path):
    df = pd.read_csv(csv_path)
    df.columns = rename_columns(list(df.columns))

    right_hand = clean_right_alone(RIGHT_HAND_ALONE)
    left_hand = df[(df['Hand Type'] == 'left') & (df['# hands'] == 1)].drop(['Time'], axis=1)

    if right_hand['Frame ID'].shape[0] < len(list(left_hand['Frame ID'])):
        right_hand['Frame ID'] = list(left_hand['Frame ID'])[:right_hand['Frame ID'].shape[0]]
    else:
        right_hand = right_hand.head(len(list(left_hand['Frame ID'])))
        right_hand['Frame ID'] = list(left_hand['Frame ID'])
    left_hand.columns = rename_columns(list(left_hand.columns), 'left')

    hands = pd.merge(left_hand, right_hand, on='Frame ID').drop(
        ['left_Hand Type', 'right_Hand Type', 'left_# hands', 'right_# hands'], axis=1)
    hands['label'] = ['alone'] * hands.shape[0]

    return hands

In [52]:
'''
function to create sync\spomtan data
'''
def data_not_alone(csv_path):
    df = pd.read_csv(csv_path)
    df.columns = rename_columns(list(df.columns))
    right_hand = df[(df['Hand Type'] == 'right') & (df['# hands'] == 2)].drop(['Time'], axis=1)
    right_hand.columns = rename_columns(list(right_hand.columns), 'right')
    left_hand = df[(df['Hand Type'] == 'left') & (df['# hands'] == 2)].drop(['Time'], axis=1)
    left_hand.columns = rename_columns(list(left_hand.columns), 'left')
    hands = pd.merge(left_hand, right_hand, on='Frame ID').drop(
        ['left_Hand Type', 'right_Hand Type', 'left_# hands', 'right_# hands'], axis=1)

    if 'sync' in str(csv_path).lower():
        hands['label'] = ['sync'] * hands.shape[0]

    elif 'spontan' in str(csv_path).lower():
        hands['label'] = ['spontan'] * hands.shape[0]

    return hands

In [53]:
'''
prepare data
'''
def prepare_data(csv_path):
    if 'alone' in str(csv_path).lower():
        return create_alone_dataframes(csv_path)
    else:
        return data_not_alone(csv_path)

In [54]:
'''
create train csv
'''
def preparing_train_data(path):
    name_id = 1
    last_frame = 0
    for root, dir, files in os.walk(path):
        for file in files:
            df = prepare_data(os.path.join(root, file))
            frame = list(range(last_frame, last_frame + int(df.shape[0])))
            df['Frame ID'] = frame
            if name_id == 1:
                hands_data = df.copy()
            else:
                hands_data = hands_data.append(df, ignore_index=True)
            name_id += 1
            last_frame += int(df.shape[0])

    return hands_data

## create train data


In [55]:
train_path = 'extraFiles\Training'
train_df = preparing_train_data(train_path)
train_df = train_df.dropna()
train_df

Unnamed: 0,Frame ID,left_Position X,left_Position Y,left_Position Z,left_Velocity X,left_Velocity Y,left_Velocity Z,left_Pitch,left_Roll,left_Yaw,...,right_Wrist Pos X,right_Wrist Pos Y,right_Wrist Pos Z,right_Elbow pos X,right_Elbow Pos Y,right_Elbow Pos Z,right_Grab Strenth,right_Grab Angle,right_Pinch Strength,label
0,0,-86.51942,171.7137,-8.232538,-22.532980,-150.81820,-73.03683,0.864090,0.498534,1.234598,...,196.7576,183.7971,34.49882,407.6624,78.40553,123.16780,0.0,0.259270,0.0,spontan
1,1,-86.83302,170.5757,-8.763070,-36.574250,-132.72730,-61.87682,0.852358,0.505628,1.226171,...,197.4556,182.1488,34.86201,409.7145,82.42125,126.79130,0.0,0.239628,0.0,spontan
2,2,-87.18336,168.2926,-10.102310,-15.975790,-139.76710,-75.22957,0.809447,0.504084,1.208143,...,198.4987,178.5398,35.74966,412.0920,87.12312,133.07030,0.0,0.209995,0.0,spontan
3,3,-87.02645,165.7985,-11.083010,5.929577,-140.99350,-48.52632,0.781005,0.502555,1.195558,...,199.5158,177.1651,36.52261,404.2419,77.77022,144.49600,0.0,0.173054,0.0,spontan
4,4,-86.96509,165.1605,-11.752500,-13.575190,22.78777,-55.82735,0.787963,0.523611,1.183493,...,198.9943,175.7093,37.45332,393.7456,70.01337,157.24780,0.0,0.156171,0.0,spontan
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193683,193683,-77.62601,173.7391,-14.213500,-46.750000,99.50911,71.01207,0.629213,1.887313,0.429763,...,129.1261,148.7041,62.82404,300.6781,-33.58672,128.75430,0.0,0.000000,0.0,sync
193684,193684,-78.43058,176.0170,-12.152660,-33.690260,98.24361,59.77054,0.667618,1.870251,0.434664,...,129.2868,148.1146,63.11218,285.6505,-53.66640,106.02620,0.0,0.000000,0.0,sync
193685,193685,-79.84506,180.2403,-9.015577,-68.451200,136.37580,87.63277,0.725945,1.838532,0.456099,...,131.8590,149.6264,66.09853,283.6185,-58.03491,95.29868,0.0,0.000000,0.0,sync
193686,193686,-81.57359,183.5054,-7.185198,-63.247310,121.75270,62.95970,0.762549,1.824587,0.459446,...,134.0416,151.5924,68.17693,288.9688,-54.13820,94.23959,0.0,0.000000,0.0,sync


In [56]:
X_train= train_df.drop(['label'],axis = 1)
Y_train = train_df['label']

In [57]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 192311 entries, 0 to 193687
Data columns (total 37 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   Frame ID              192311 non-null  int64  
 1   left_Position X       192311 non-null  float64
 2   left_Position Y       192311 non-null  float64
 3   left_Position Z       192311 non-null  float64
 4   left_Velocity X       192311 non-null  float64
 5   left_Velocity Y       192311 non-null  float64
 6   left_Velocity Z       192311 non-null  float64
 7   left_Pitch            192311 non-null  float64
 8   left_Roll             192311 non-null  float64
 9   left_Yaw              192311 non-null  float64
 10  left_Wrist Pos X      192311 non-null  float64
 11  left_Wrist Pos Y      192311 non-null  float64
 12  left_Wrist Pos Z      192311 non-null  float64
 13  left_Elbow pos X      192311 non-null  float64
 14  left_Elbow Pos Y      192311 non-null  float64
 15  

In [58]:
Y_train.value_counts()

spontan    66725
sync       66048
alone      59538
Name: label, dtype: int64

In [59]:
X_train.isnull().values.any()


False

In [60]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.2, random_state = 42)

In [61]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

bag_clf = BaggingClassifier( 
    DecisionTreeClassifier(), n_estimators=500,
    max_samples=100, bootstrap=True, n_jobs=-1)
bag_clf.fit(x_train, y_train)
y_pred = bag_clf.predict(x_test)


In [62]:
accuracy_score(y_test, y_pred)

0.8556014871434885

In [63]:
from sklearn.metrics import classification_report, confusion_matrix
print ("Classification Report")
print(classification_report(y_test, y_pred))
print ("Confusion Report")
print(confusion_matrix(y_test, y_pred))

Classification Report
              precision    recall  f1-score   support

       alone       0.97      0.99      0.98     11864
     spontan       0.78      0.84      0.81     13302
        sync       0.83      0.75      0.79     13297

    accuracy                           0.86     38463
   macro avg       0.86      0.86      0.86     38463
weighted avg       0.86      0.86      0.85     38463

Confusion Report
[[11778    51    35]
 [  113 11211  1978]
 [  211  3166  9920]]


# Testing

In [64]:
path = r'extraFiles\Validation'
csvs=[]
for root, dir, files in os.walk(path):
        for file in files:
            csvs.append(os.path.join(root, file))
            
csvs

['extraFiles\\Validation\\Itiel Lab\\Itiel636770937431610843Spontan.csv',
 'extraFiles\\Validation\\Itiel Lab\\Itiel636770938584048459Sync.csv',
 'extraFiles\\Validation\\Itiel Lab\\Itiel636770939529673940Alone.csv',
 'extraFiles\\Validation\\Itiel Lab\\Itiel636770940701219076Spontan.csv',
 'extraFiles\\Validation\\Itiel Lab\\Itiel636770941638547912Sync.csv',
 'extraFiles\\Validation\\Itiel Lab\\Itiel636770942529041679Alone.csv',
 'extraFiles\\Validation\\Mor SHerman\\Mor636771064635567065Spontan.csv',
 'extraFiles\\Validation\\Mor SHerman\\Mor636771065531910215Sync.csv',
 'extraFiles\\Validation\\Mor SHerman\\Mor636771066407626524Alone.csv',
 'extraFiles\\Validation\\Oded Medina\\Oded636754665152715825Alone.csv',
 'extraFiles\\Validation\\Oded Medina\\Oded636754667262994505Spontan.csv',
 'extraFiles\\Validation\\Oded Medina\\Oded636754668465917901Sync.csv',
 'extraFiles\\Validation\\Oded Medina\\Oded636754669540564461Alone.csv',
 'extraFiles\\Validation\\Oded Medina\\Oded6367546704902

In [65]:
for csv in csvs:
    df=prepare_data(csv).dropna()
    x_validation=df.drop(['label'],axis=1)
    y_validation=df['label']
    prediction=bag_clf.predict(x_validation)
    print(csv, accuracy_score(y_validation, prediction))

extraFiles\Validation\Itiel Lab\Itiel636770937431610843Spontan.csv 0.9459336665152204
extraFiles\Validation\Itiel Lab\Itiel636770938584048459Sync.csv 0.7311685442898167
extraFiles\Validation\Itiel Lab\Itiel636770939529673940Alone.csv 0.9930523390458545
extraFiles\Validation\Itiel Lab\Itiel636770940701219076Spontan.csv 0.889914452949122
extraFiles\Validation\Itiel Lab\Itiel636770941638547912Sync.csv 0.6625570776255708
extraFiles\Validation\Itiel Lab\Itiel636770942529041679Alone.csv 0.9924634950541686
extraFiles\Validation\Mor SHerman\Mor636771064635567065Spontan.csv 0.9367854741089442
extraFiles\Validation\Mor SHerman\Mor636771065531910215Sync.csv 0.7523638000900496
extraFiles\Validation\Mor SHerman\Mor636771066407626524Alone.csv 0.9909638554216867
extraFiles\Validation\Oded Medina\Oded636754665152715825Alone.csv 0.9902309058614565
extraFiles\Validation\Oded Medina\Oded636754667262994505Spontan.csv 0.9279964221824687
extraFiles\Validation\Oded Medina\Oded636754668465917901Sync.csv 0.224

In [66]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

model = RandomForestClassifier(n_estimators=100, n_jobs=-1)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print(accuracy_score(y_test, y_pred))

0.9996100148194369


In [68]:
for csv in csvs:
    df=prepare_data(csv).dropna()
    x_validation=df.drop(['label'],axis=1)
    y_validation=df['label']
    prediction=model.predict(x_validation)
    print(csv, accuracy_score(y_validation, prediction))

extraFiles\Validation\Itiel Lab\Itiel636770937431610843Spontan.csv 0.9647887323943662
extraFiles\Validation\Itiel Lab\Itiel636770938584048459Sync.csv 0.8272586702010162
extraFiles\Validation\Itiel Lab\Itiel636770939529673940Alone.csv 1.0
extraFiles\Validation\Itiel Lab\Itiel636770940701219076Spontan.csv 0.9520486267447096
extraFiles\Validation\Itiel Lab\Itiel636770941638547912Sync.csv 0.7940639269406393
extraFiles\Validation\Itiel Lab\Itiel636770942529041679Alone.csv 1.0
extraFiles\Validation\Mor SHerman\Mor636771064635567065Spontan.csv 0.9849809459762385
extraFiles\Validation\Mor SHerman\Mor636771065531910215Sync.csv 0.8187753264295362
extraFiles\Validation\Mor SHerman\Mor636771066407626524Alone.csv 1.0
extraFiles\Validation\Oded Medina\Oded636754665152715825Alone.csv 1.0
extraFiles\Validation\Oded Medina\Oded636754667262994505Spontan.csv 0.9964221824686941
extraFiles\Validation\Oded Medina\Oded636754668465917901Sync.csv 0.2311111111111111
extraFiles\Validation\Oded Medina\Oded6367546