In [1]:
import pandas as pd
import numpy as np
import glob

from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegressionCV
from xgboost import XGBClassifier
from sklearn.metrics import classification_report

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
Names = glob.glob(r'C:\Users\USER\OneDrive\שולחן העבודה\Training'+"/*")
li = []
for i,name in enumerate(Names):
    all_files = glob.glob(name + "/*.csv")
    situation = ["Spontan", "Sync" , "Alone"]

    for filename in all_files:
        f = pd.read_csv(filename, index_col=None, header=0)
        for j,label in enumerate(situation):
            if label in filename:
                f[" Name"] = i
                f[" Situations"] = j
                f = (f[f["Time"] > (f["Time"][0]+7)])
        li.append(f)

df = pd.concat(li, axis=0, ignore_index=True)

df.loc[(df[' Hand Type'] == 'left') , " Hand Type"] = int(0)
df.loc[(df[' Hand Type'] == 'right') , " Hand Type"] = int(1)
df[' Hand Type'] = df[' Hand Type'].astype(int)

In [3]:
df.head()

Unnamed: 0,Time,Frame ID,Hand Type,# hands,Position X,Position Y,Position Z,Velocity X,Velocity Y,Velocity Z,...,Wrist Pos Y,Wrist Pos Z,Elbow pos X,Elbow Pos Y,Elbow Pos Z,Grab Strenth,Grab Angle,Pinch Strength,Name,Situations
0,71.41422,98073,0,2,-109.8061,228.382,87.32578,-41.53802,116.7071,-60.09788,...,186.2046,53.51289,-346.8731,25.91932,27.82433,0.0,0.030295,0.0,0,0
1,71.41422,98073,1,2,136.7226,201.686,-35.08604,2.458278,189.4632,113.807,...,166.615,15.43945,313.4828,3.741617,144.8563,0.0,0.229723,0.0,0,0
2,71.43108,98075,0,2,-110.429,230.8011,86.41932,-23.84248,125.9713,-58.77163,...,187.6303,53.10642,-345.148,25.04667,28.62841,0.0,0.019477,0.0,0,0
3,71.43108,98075,1,2,137.0247,205.0279,-32.5059,10.02984,189.792,155.6046,...,167.4718,16.26752,312.2578,1.103906,142.7872,0.0,0.233833,0.0,0,0
4,71.44753,98076,0,2,-110.4223,231.4766,85.78614,0.77496,77.71951,-72.85492,...,188.1121,52.70757,-345.9719,26.50846,28.25391,0.0,0.0,0.0,0,0


In [4]:
df.shape

(300397, 24)

In order to have a numeric dataframe, we change the names,hands,situation to numbers

**Situation :** <br>
Spontan - **0** <br>
Sync - **1** <br>
Alone - **2**

**Hand type :** <br>
Left - **0** <br>
Right - **1**

We also removed the first 7 seconds of each participant

In [5]:
df.isna().sum()

Time               0
 Frame ID          0
 Hand Type         0
 # hands           0
 Position X        0
 Position Y        0
 Position Z        0
 Velocity X        0
 Velocity Y        0
 Velocity Z        0
 Pitch             0
 Roll              0
 Yaw               0
 Wrist Pos X       0
 Wrist Pos Y       0
 Wrist Pos Z       0
 Elbow pos X       0
 Elbow Pos Y       0
 Elbow Pos Z       0
 Grab Strenth      0
 Grab Angle        0
 Pinch Strength    0
 Name              0
 Situations        0
dtype: int64

As we can tell there are no null values in the dataframe

In [6]:
p = df[(df[" # hands"]!=1)|(df[" Situations"]==2)]
df = p[(p[" # hands"]==1)|(p[" Situations"]!=2)]

In [7]:
HandRight=pd.read_csv(r"C:\Users\USER\OneDrive\שולחן העבודה\HandRight.csv",index_col=None,header=0)

In [8]:
HandRight.dropna(inplace = True)

In [9]:
HandRight = HandRight[HandRight[" Hand Type"]=='right']

### Let's deal with each situation : 
Starting with **Alone** 

In [10]:
Alone = df[df[" Situations"]==2]

In [11]:
l = []
right = HandRight
right.columns+="_"
for i in range(9):
    f = Alone[Alone[" Name"] == i]
    f.reset_index(inplace= True, drop = True)
    l.append(pd.concat([f, right], axis=1, ignore_index= False).dropna())

Alone = pd.concat(l, axis=0, ignore_index=True)
Alone.drop([" Name"], axis=1, inplace=True)

In [12]:
Alone.head()

Unnamed: 0,Time,Frame ID,Hand Type,# hands,Position X,Position Y,Position Z,Velocity X,Velocity Y,Velocity Z,...,Yaw_,Wrist Pos X_,Wrist Pos Y_,Wrist Pos Z_,Elbow pos X_,Elbow Pos Y_,Elbow Pos Z_,Grab Strenth_,Grab Angle_,Pinch Strength_
0,252.6601,118956.0,0.0,1.0,-117.1001,218.1561,138.0662,-53.72382,113.0561,138.4025,...,-0.2778,46.88831,207.3548,86.23589,203.8337,49.33507,237.4553,0.0,0.206389,0.0
1,252.677,118958.0,0.0,1.0,-118.345,220.6792,140.5435,-77.86527,151.4741,140.3897,...,-0.277763,47.54861,205.6568,86.4225,203.9553,47.88272,238.4482,0.0,0.24305,0.0
2,252.6934,118960.0,0.0,1.0,-120.132,224.1188,143.2279,-104.6922,206.2707,153.1618,...,-0.278792,48.1503,204.5182,86.35149,204.6231,45.35921,236.8544,0.0,0.281134,0.0
3,252.7098,118962.0,0.0,1.0,-121.2949,226.8678,145.1182,-59.04,142.4355,92.74687,...,-0.279768,48.64175,203.9989,86.16056,204.9939,44.18692,236.0927,0.0,0.277691,0.0
4,252.7263,118964.0,0.0,1.0,-121.9236,228.824,146.0297,-41.81462,124.4137,50.7195,...,-0.278629,48.93135,203.3777,85.59017,204.5952,42.68003,235.2896,0.0,0.283008,0.0


Now **Sync** or **Spontan**

In [13]:
SyncOrSpontan = df[df[' Situations']!=2]

In [14]:
Left = SyncOrSpontan[SyncOrSpontan[" Hand Type"]==0]

In [15]:
Right = SyncOrSpontan[SyncOrSpontan[" Hand Type"]==1]

In [16]:
Right.columns += "_"

In [17]:
SyncOrSpontan = pd.concat([Left,Right],axis=1)

In [18]:
SyncOrSpontan.reset_index(inplace=True)

In [19]:
SyncOrSpontan.drop([" Name"],axis=1,inplace=True)

In [20]:
SyncOrSpontan.head()

Unnamed: 0,index,Time,Frame ID,Hand Type,# hands,Position X,Position Y,Position Z,Velocity X,Velocity Y,...,Wrist Pos Y_,Wrist Pos Z_,Elbow pos X_,Elbow Pos Y_,Elbow Pos Z_,Grab Strenth_,Grab Angle_,Pinch Strength_,Name_,Situations_
0,0,71.41422,98073.0,0.0,2.0,-109.8061,228.382,87.32578,-41.53802,116.7071,...,,,,,,,,,,
1,1,,,,,,,,,,...,166.615,15.43945,313.4828,3.741617,144.8563,0.0,0.229723,0.0,0.0,0.0
2,2,71.43108,98075.0,0.0,2.0,-110.429,230.8011,86.41932,-23.84248,125.9713,...,,,,,,,,,,
3,3,,,,,,,,,,...,167.4718,16.26752,312.2578,1.103906,142.7872,0.0,0.233833,0.0,0.0,0.0
4,4,71.44753,98076.0,0.0,2.0,-110.4223,231.4766,85.78614,0.77496,77.71951,...,,,,,,,,,,


#### Now let's combine them into one dataframe for the whole dataset :

In [21]:
new_df = pd.concat([SyncOrSpontan,Alone],axis=0,ignore_index=True)

In [22]:
new_df = new_df.drop(["Time","index"," Hand Type"," Hand Type_","Time_"," # hands"," # hands_"," Name_"," Frame ID_"," Situations_"],axis=1)

We took every 10th row :

In [23]:
new_df=new_df[::10]

In [24]:
new_df.head()

Unnamed: 0,Frame ID,Position X,Position Y,Position Z,Velocity X,Velocity Y,Velocity Z,Pitch,Roll,Yaw,...,Yaw_,Wrist Pos X_,Wrist Pos Y_,Wrist Pos Z_,Elbow pos X_,Elbow Pos Y_,Elbow Pos Z_,Grab Strenth_,Grab Angle_,Pinch Strength_
0,98073.0,-109.8061,228.382,87.32578,-41.53802,116.7071,-60.09788,2.147859,0.867143,2.206444,...,,,,,,,,,,
10,98082.0,-112.5457,241.5983,82.42186,-71.08801,279.6893,-56.87809,2.050235,0.896647,2.147714,...,,,,,,,,,,
20,98092.0,-114.9085,259.3652,72.8743,24.98973,181.8221,-106.7381,1.887583,0.920726,1.994715,...,,,,,,,,,,
30,98101.0,-120.2698,273.1273,64.53665,-71.74786,187.0123,-34.23848,1.781152,1.089296,1.990816,...,,,,,,,,,,
40,98111.0,-122.3727,278.2248,70.00634,-47.98631,27.11987,165.2275,1.780755,1.092412,1.992721,...,,,,,,,,,,


Let's check if there are duplicated ID's, and remove them but only keep the lasts :

In [25]:
new_df.duplicated(subset=" Frame ID").sum()

17754

In [26]:
new_df.loc[new_df[" Frame ID"].duplicated(),:].head()

Unnamed: 0,Frame ID,Position X,Position Y,Position Z,Velocity X,Velocity Y,Velocity Z,Pitch,Roll,Yaw,...,Yaw_,Wrist Pos X_,Wrist Pos Y_,Wrist Pos Z_,Elbow pos X_,Elbow Pos Y_,Elbow Pos Z_,Grab Strenth_,Grab Angle_,Pinch Strength_
8220,,,,,,,,,,,...,-2.146385,87.4499,176.244,63.4177,284.103,24.1057,47.2746,0.0,0.0,0.0
8230,,,,,,,,,,,...,-2.148565,86.148,176.624,68.5792,284.213,26.8019,48.5189,0.0,0.0,0.0
8240,,,,,,,,,,,...,-2.158652,86.2498,174.468,69.5356,279.494,18.1457,52.2434,0.0,0.0,0.0
8250,,,,,,,,,,,...,-2.153379,85.8401,173.367,69.0777,279.659,17.5559,53.712,0.0,0.0,0.0
8260,,,,,,,,,,,...,-2.000205,88.9467,173.596,66.6367,277.918,11.6969,54.0939,0.0,0.02642,0.0


In [27]:
new_df=new_df.drop_duplicates(subset=" Frame ID")

In [28]:
new_df.shape

(10178, 38)

In [29]:
new_df.head()

Unnamed: 0,Frame ID,Position X,Position Y,Position Z,Velocity X,Velocity Y,Velocity Z,Pitch,Roll,Yaw,...,Yaw_,Wrist Pos X_,Wrist Pos Y_,Wrist Pos Z_,Elbow pos X_,Elbow Pos Y_,Elbow Pos Z_,Grab Strenth_,Grab Angle_,Pinch Strength_
0,98073.0,-109.8061,228.382,87.32578,-41.53802,116.7071,-60.09788,2.147859,0.867143,2.206444,...,,,,,,,,,,
10,98082.0,-112.5457,241.5983,82.42186,-71.08801,279.6893,-56.87809,2.050235,0.896647,2.147714,...,,,,,,,,,,
20,98092.0,-114.9085,259.3652,72.8743,24.98973,181.8221,-106.7381,1.887583,0.920726,1.994715,...,,,,,,,,,,
30,98101.0,-120.2698,273.1273,64.53665,-71.74786,187.0123,-34.23848,1.781152,1.089296,1.990816,...,,,,,,,,,,
40,98111.0,-122.3727,278.2248,70.00634,-47.98631,27.11987,165.2275,1.780755,1.092412,1.992721,...,,,,,,,,,,


## Let's train the dataset :

In [30]:
new_df.loc[(new_df[' Wrist Pos X_'].isnull()) , "  Wrist Pos X_"] = int(0)
new_df.loc[(new_df[' Wrist Pos Y_'].isnull()) , "  Wrist Pos Y_"] = int(0)
new_df.loc[(new_df[' Wrist Pos Z_'].isnull()) , "  Wrist Pos Z_"] = int(0)
new_df.loc[(new_df[' Elbow pos X_'].isnull()) , "  Elbow pos X_"] = int(0)
new_df.loc[(new_df[' Elbow Pos Y_'].isnull()) , "  Elbow Pos Y_"] = int(0)
new_df.loc[(new_df[' Elbow Pos Z_'].isnull()) , "  Elbow Pos Z_"] = int(0)

In [31]:
new_df[' Wrist Pos X_']=pd.to_numeric(new_df[' Wrist Pos X_'],errors = 'coerce')
new_df[' Wrist Pos Y_']=pd.to_numeric(new_df[' Wrist Pos Y_'],errors = 'coerce')
new_df[' Wrist Pos Z_']=pd.to_numeric(new_df[' Wrist Pos Z_'],errors = 'coerce')
new_df[' Elbow pos X_']=pd.to_numeric(new_df[' Elbow pos X_'],errors = 'coerce')
new_df[' Elbow Pos Y_']=pd.to_numeric(new_df[' Elbow Pos Y_'],errors = 'coerce')
new_df[' Elbow Pos Z_']=pd.to_numeric(new_df[' Elbow Pos Z_'],errors = 'coerce')

In [32]:
new_df = new_df.drop([' Wrist Pos X_', ' Wrist Pos Y_', ' Wrist Pos Z_',
       ' Elbow pos X_', ' Elbow Pos Y_', ' Elbow Pos Z_'],axis=1)

In [33]:
new_df=new_df.fillna(0)

In [34]:
new_df=new_df.round()

In [35]:
for i in new_df.columns:
    new_df[i]=new_df[i].astype(int)

In [36]:
new_df.head()

Unnamed: 0,Frame ID,Position X,Position Y,Position Z,Velocity X,Velocity Y,Velocity Z,Pitch,Roll,Yaw,...,Yaw_,Grab Strenth_,Grab Angle_,Pinch Strength_,Wrist Pos X_,Wrist Pos Y_,Wrist Pos Z_,Elbow pos X_,Elbow Pos Y_,Elbow Pos Z_
0,98073,-110,228,87,-42,117,-60,2,1,2,...,0,0,0,0,0,0,0,0,0,0
10,98082,-113,242,82,-71,280,-57,2,1,2,...,0,0,0,0,0,0,0,0,0,0
20,98092,-115,259,73,25,182,-107,2,1,2,...,0,0,0,0,0,0,0,0,0,0
30,98101,-120,273,65,-72,187,-34,2,1,2,...,0,0,0,0,0,0,0,0,0,0
40,98111,-122,278,70,-48,27,165,2,1,2,...,0,0,0,0,0,0,0,0,0,0


In [37]:
X = new_df.drop(" Situations",axis=1)
Y = new_df[" Situations"]

### Split the data

In [38]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.33,random_state=42)

### Logistic Regression classifier

In [39]:
LogisticRegression = LogisticRegressionCV()
LogisticRegression.fit(X, Y)
lr_pred = LogisticRegression.predict(X_test)
print(accuracy_score(Y_test, lr_pred))
print(classification_report(Y_test,lr_pred))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

0.9032450133968443
              precision    recall  f1-score   support

           0       0.87      0.84      0.86      1153
           1       0.82      0.86      0.84      1007
           2       1.00      1.00      1.00      1199

    accuracy                           0.90      3359
   macro avg       0.90      0.90      0.90      3359
weighted avg       0.90      0.90      0.90      3359



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


### Random Forest classifier

In [40]:
RandomForestClassifier = RandomForestClassifier(max_depth=1)
RandomForestClassifier.fit(X, Y)
rf_pred = RandomForestClassifier.predict(X_test)
print(accuracy_score(Y_test, rf_pred))
print(classification_report(Y_test,rf_pred))

0.7606430485263471
              precision    recall  f1-score   support

           0       0.59      1.00      0.74      1153
           1       1.00      0.20      0.34      1007
           2       1.00      1.00      1.00      1199

    accuracy                           0.76      3359
   macro avg       0.86      0.73      0.69      3359
weighted avg       0.86      0.76      0.71      3359



### XGBoost classifier

In [41]:
xgb = XGBClassifier(n_estimators =200,n_jobs=-1)
xgb.fit(X, Y)
xgb_pred = xgb.predict(X_test)
print(accuracy_score(Y_test, xgb_pred))
print(classification_report(Y_test,xgb_pred))



1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1153
           1       1.00      1.00      1.00      1007
           2       1.00      1.00      1.00      1199

    accuracy                           1.00      3359
   macro avg       1.00      1.00      1.00      3359
weighted avg       1.00      1.00      1.00      3359





### KNeighbors classifier

In [42]:
knn = KNeighborsClassifier()
knn.fit(X, Y)
knn_pred = knn.predict(X_test)
print(accuracy_score(Y_test, knn_pred))
print(classification_report(Y_test,knn_pred))

0.9857100327478416
              precision    recall  f1-score   support

           0       0.99      1.00      0.99      1153
           1       0.97      1.00      0.98      1007
           2       1.00      0.96      0.98      1199

    accuracy                           0.99      3359
   macro avg       0.99      0.99      0.99      3359
weighted avg       0.99      0.99      0.99      3359



# Accuracy scores: 
- logistic regression : 0.90 <br>
- random forest : 0.76 <br>
- xgboost : 1.0 <br>
- kneighbors : 0.98 

## Interesting results ! 
We got an **1.0** accuracy score, using xgboost classifier !