In [245]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


import pickle

In [246]:
# Load the data.
# The data is stored in a bunch of csv files
# for example data for the letter A is stored in the file "training_data_0/A.csv"
# we will load all the data into a pandas dataframe
df = pd.DataFrame()

num_to_letter = dict()
letter_to_num = dict()

# index and loop through the letters
for i, letter in enumerate("ABCDEFGHIKLMNOPQRSTUVWXY"):
    # read the csv file
    df_letter = pd.read_csv(f"training_data_0/{letter}.csv")
    # drop the "'symbol'" column
    df_letter = df_letter.drop(columns=["'symbol'"])
    # add a column to the dataframe with the letter
    df_letter["symbol"] = i
    # add the letter to the letter maps
    num_to_letter[i] = letter
    letter_to_num[letter] = i
    # add the dataframe to the main dataframe
    df = pd.concat([df, df_letter])

# rename the "'symbol'" column to "symbol"
df = df.rename(columns={"'symbol'": "symbol"})

df.head()

Unnamed: 0,x1,y1,z1,x2,y2,z2,x3,y3,z3,x4,...,x19,y19,z19,x20,y20,z20,x21,y21,z21,symbol
0,0.284234,0.599662,1.510391e-09,0.352661,0.57424,-0.018644,0.401655,0.505305,-0.03125,0.417682,...,0.26475,0.411588,-0.051163,0.270928,0.466022,-0.052875,0.276105,0.505524,-0.052011,0
1,0.285704,0.594281,-4.252064e-09,0.351391,0.575788,-0.023778,0.397199,0.503982,-0.038397,0.411797,...,0.26084,0.409029,-0.05418,0.269301,0.466309,-0.058145,0.274917,0.504831,-0.057939,0
2,0.282022,0.592181,-4.262098e-09,0.348222,0.574958,-0.024807,0.395815,0.503679,-0.039821,0.411231,...,0.257454,0.408016,-0.052401,0.265423,0.464812,-0.05643,0.270709,0.503389,-0.056365,0
3,0.27987,0.594496,-3.989998e-09,0.34521,0.575632,-0.021771,0.391491,0.502638,-0.035953,0.406466,...,0.254278,0.407087,-0.054067,0.262207,0.463662,-0.057304,0.266132,0.502502,-0.056816,0
4,0.27503,0.592625,-4.403823e-09,0.340713,0.573089,-0.021498,0.387466,0.500672,-0.035819,0.402215,...,0.248197,0.406337,-0.052727,0.256451,0.461449,-0.05608,0.261073,0.500557,-0.055821,0


In [247]:
df["symbol"].value_counts()

4     375
2     313
17    298
11    283
14    260
20    257
12    256
19    252
15    248
10    247
22    246
9     245
21    244
3     243
7     241
18    238
6     237
5     237
1     234
23    233
16    233
13    233
8     233
0     225
Name: symbol, dtype: int64

In [248]:
df.describe()

Unnamed: 0,x1,y1,z1,x2,y2,z2,x3,y3,z3,x4,...,x19,y19,z19,x20,y20,z20,x21,y21,z21,symbol
count,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,...,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0
mean,0.440433,0.62275,5.929709e-10,0.486052,0.603479,-0.018906,0.518731,0.554779,-0.037185,0.521704,...,0.430614,0.464734,-0.072318,0.439989,0.486142,-0.076378,0.445179,0.497874,-0.077505,11.356079
std,0.191912,0.181983,5.487365e-09,0.196405,0.182482,0.022592,0.204327,0.187479,0.033304,0.217539,...,0.220695,0.19434,0.026841,0.221597,0.198481,0.026343,0.219653,0.201887,0.027505,6.902894
min,0.090501,0.141086,-1.96911e-08,0.109391,0.128427,-0.107338,0.103584,0.105467,-0.156331,0.059861,...,0.023418,0.080981,-0.191882,0.025751,0.072168,-0.199254,0.031057,0.041557,-0.197314,0.0
25%,0.289168,0.476999,-2.008002e-09,0.3276,0.453367,-0.031281,0.355535,0.392496,-0.053778,0.344039,...,0.251514,0.305164,-0.08848,0.258871,0.333067,-0.090959,0.265973,0.349207,-0.092341,5.0
50%,0.390545,0.595534,1.151827e-09,0.441132,0.579819,-0.019073,0.478165,0.5405,-0.036178,0.477882,...,0.379583,0.441283,-0.073844,0.391482,0.463063,-0.075681,0.394272,0.477647,-0.075551,11.0
75%,0.62665,0.792995,3.73428e-09,0.678022,0.777037,-0.007548,0.7202,0.731449,-0.021938,0.738128,...,0.647629,0.627818,-0.056358,0.65967,0.630992,-0.06109,0.65934,0.644602,-0.061332,17.0
max,0.836572,1.025575,1.916064e-08,0.898871,0.997497,0.082677,0.945384,0.95046,0.117195,0.976453,...,0.916674,0.986162,0.053344,0.90374,1.020965,0.040676,0.890713,1.031843,0.034286,23.0


In [249]:
df.dtypes

x1        float64
y1        float64
z1        float64
x2        float64
y2        float64
           ...   
z20       float64
x21       float64
y21       float64
z21       float64
symbol      int64
Length: 64, dtype: object

In [250]:
# we need to modify the data. Each row represents the xyz coordinates of several points on a hand
# we will convert this into a series of distances between the points and the center of the hand
normalized_df = pd.DataFrame()
# get the center of the hand
first = True
for row in df.iterrows():
    # get the xyz coordinates of the points
    if first:
        print(row[1][:].values)
        print(row[1][:-1].values)
        first = False
    points = row[1][:-1].values.reshape(-1, 3)
    # get the center of the hand
    center = np.mean(points, axis=0)
    # get the distances between the points and the center
    distances = np.linalg.norm(points - center, axis=1)
    # add the distances to the normalized dataframe
    normalized_df = pd.concat([normalized_df, pd.DataFrame(distances).T])

[ 2.84234434e-01  5.99662066e-01  1.51039137e-09  3.52660865e-01
  5.74240446e-01 -1.86437536e-02  4.01654959e-01  5.05305409e-01
 -3.12500969e-02  4.17681813e-01  4.36548412e-01 -4.56334427e-02
  4.22503233e-01  3.77860457e-01 -5.84865883e-02  3.74746442e-01
  4.36910540e-01 -8.12254008e-03  3.80444705e-01  3.89682174e-01
 -2.00392306e-02  3.67883712e-01  4.49001282e-01 -2.07441952e-02
  3.63591522e-01  4.97271508e-01 -2.23034602e-02  3.35153967e-01
  4.29939628e-01 -1.52731035e-02  3.40880424e-01  3.88413638e-01
 -4.07350659e-02  3.35393161e-01  4.62964505e-01 -5.23742251e-02
  3.34779263e-01  5.15209615e-01 -5.54651171e-02  2.94720203e-01
  4.33333546e-01 -2.50072163e-02  3.02414715e-01  3.95723999e-01
 -5.02379835e-02  3.02014083e-01  4.72031504e-01 -5.67812920e-02
  3.04903924e-01  5.23163736e-01 -5.66067770e-02  2.54041135e-01
  4.42956179e-01 -3.69402952e-02  2.64749527e-01  4.11588371e-01
 -5.11631295e-02  2.70927608e-01  4.66022164e-01 -5.28754666e-02
  2.76105493e-01  5.05524

In [251]:
normalized_df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20
count,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,...,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0
mean,0.16888,0.135518,0.092419,0.070245,0.078422,0.06243,0.080998,0.092487,0.110193,0.049627,...,0.06993,0.08997,0.054527,0.052702,0.055431,0.074259,0.076923,0.068345,0.074658,0.087115
std,0.039238,0.037373,0.034306,0.030727,0.031394,0.013824,0.021424,0.038436,0.054275,0.019953,...,0.043893,0.063577,0.018517,0.02098,0.028128,0.041497,0.015087,0.019747,0.0272,0.036576
min,0.038313,0.030872,0.022458,0.001777,0.007949,0.00287,0.011499,0.007262,0.002656,0.009778,...,0.006704,0.004441,0.008384,0.005722,0.003783,0.002551,0.007935,0.010838,0.014071,0.013151
25%,0.140355,0.108687,0.065736,0.04582,0.05644,0.052694,0.067097,0.058526,0.059297,0.035693,...,0.032528,0.034391,0.040797,0.037143,0.03484,0.046042,0.066589,0.054724,0.057067,0.061895
50%,0.163896,0.128998,0.084116,0.069041,0.074566,0.06065,0.080629,0.088747,0.107295,0.043712,...,0.054058,0.068965,0.050821,0.046897,0.045515,0.061525,0.078056,0.062134,0.06994,0.076313
75%,0.194633,0.158656,0.115661,0.088772,0.096883,0.070891,0.092415,0.123537,0.156276,0.057363,...,0.111149,0.15731,0.06412,0.067974,0.070391,0.088147,0.088172,0.081652,0.083545,0.104001
max,0.271586,0.242121,0.192109,0.169416,0.160228,0.114997,0.146094,0.21062,0.263544,0.119883,...,0.166165,0.220927,0.115243,0.118714,0.149044,0.203736,0.121115,0.141722,0.183354,0.22181


In [252]:
# we will create a neural network with 2 hidden layers
model = tf.keras.models.Sequential([
    tf.keras.layers.InputLayer(input_shape=(21,)),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(48, activation="relu"),
    tf.keras.layers.Dense(24, activation="softmax")
])

# use xgb regressor
from xgboost import XGBRegressor

model_2 = XGBRegressor(learning_rate=0.1, n_estimators=100, max_depth=3, min_child_weight=1, subsample=0.8, colsample_bytree=0.8, scale_pos_weight=1, seed=27)

# use random forest regressor
from sklearn.ensemble import RandomForestRegressor

model_3 = RandomForestRegressor()

# use linear regression
from sklearn.linear_model import LinearRegression

model_4 = LinearRegression()

# use a decision tree
from sklearn.tree import DecisionTreeRegressor

model_5 = DecisionTreeRegressor(min_samples_leaf=0.13, random_state=27) # max_depth=10, max_leaf_nodes=13
# model_5.max_depth = 15
# model_5.max_leaf_nodes = 24

In [253]:
# use test train split to split the data into training and validation data
from sklearn.model_selection import train_test_split

X = normalized_df
y = df["symbol"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

In [254]:
# compile the model
model.compile(
    loss='mean_absolute_error',
    optimizer="adam",
    metrics=['mean_absolute_error', 'mean_squared_error', 'accuracy']
)

# train the model
model.fit(
    X_train, y_train,
    epochs=10,
)

model_2.fit(
    X_train, y_train
)

model_3.fit(
    X_train, y_train
)

model_4.fit(
    X_train, y_train
)

model_5.fit(
    X_train, y_train
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [255]:
# test the model
model.evaluate(X_test, y_test)



[11.37677001953125,
 11.37677001953125,
 176.97531127929688,
 0.060457516461610794]

In [256]:
model_2.score(X_test, y_test)

0.9555818832427639

In [257]:
model_3.score(X_test, y_test)

0.9879127343931685

In [258]:
model_4.score(X_test, y_test)

0.5297807211967389

In [272]:
model_5.score(X_test, y_test)

0.9719673451337159

In [259]:
model_2.predict(X_test)

array([ 1.11442394e+01,  1.83313918e+00,  1.10215759e+01,  1.61080952e+01,
        8.28927135e+00,  1.69721146e+01,  6.82213640e+00,  6.70756531e+00,
        1.88213005e+01,  4.27743816e+00,  1.62996788e+01,  1.66688824e+01,
        2.08596458e+01,  3.04592681e+00, -3.32274854e-01,  1.68301144e+01,
        2.16422486e+00,  2.07066479e+01,  2.50376534e+00,  1.28763313e+01,
        2.72872710e+00,  9.58587360e+00,  4.48008299e+00,  1.85841579e+01,
        1.64537125e+01,  1.62246475e+01,  8.00806141e+00,  1.37960634e+01,
        2.22720909e+00,  1.02160006e+01,  5.21272719e-01,  1.25571001e+00,
        1.96516705e+01,  5.61368763e-01,  1.61822186e+01,  6.87061501e+00,
        1.30778971e+01,  7.38046527e-01,  5.37457657e+00,  1.99029636e+01,
        1.67310276e+01,  1.48475351e+01,  2.34255047e+01,  1.74456806e+01,
        1.61951294e+01,  9.21380997e-01,  1.19463243e+01,  8.86633682e+00,
        2.12764320e+01,  4.46290731e+00,  1.07946001e-01,  1.62496986e+01,
        5.42125368e+00,  

In [260]:
model_3.predict(X_test)

array([11.56,  1.  , 11.9 , 17.52, 10.  , 16.  ,  6.  ,  7.02, 20.  ,
        4.  , 16.03, 16.97, 21.  ,  4.  ,  0.  , 18.  ,  2.  , 22.85,
        2.03, 13.  ,  2.11,  9.93,  4.  , 17.03, 16.95, 16.11,  6.24,
       13.1 ,  3.98,  8.82,  0.03,  0.03, 22.  ,  1.  , 15.91,  7.  ,
       14.26,  0.  ,  3.12, 19.93, 15.13, 15.14, 23.  , 18.  , 21.79,
        0.  , 14.64, 11.33, 22.85,  3.  ,  0.  , 17.91,  4.15,  0.  ,
       22.  ,  5.42,  4.05,  8.  , 16.72, 23.  ,  6.  , 19.52,  0.13,
       22.  ,  7.  , 11.01,  8.96, 12.63,  8.  , 23.  , 20.  , 11.47,
       13.  ,  6.  ,  7.02, 17.32, 13.65, 16.38, 10.59, 16.09,  4.  ,
        8.85,  1.  ,  7.  , 16.06,  6.  ,  1.  , 11.77,  9.95, 16.3 ,
       12.21, 10.  ,  3.  ,  6.03, 19.01, 11.56,  5.9 , 16.79,  2.11,
       12.91, 23.  , 20.  , 12.22, 17.03, 12.  , 12.  , 13.  ,  1.  ,
       23.  , 16.95, 13.92,  7.  ,  9.  , 15.32,  2.  , 18.  ,  0.12,
       14.03,  6.  ,  6.  ,  1.  , 12.73, 12.  ,  1.  ,  9.17, 17.  ,
        5.  ,  4.  ,

In [270]:
model_4.predict(X_test)

array([13.07437122,  4.73866109,  4.83949967, 15.67763873,  7.48466572,
       20.18938948, 14.60173463,  9.12839932, 19.71295832,  0.95299948,
       22.39650952, 17.02730971, 10.63355084,  0.08010081,  5.65938937,
       15.9432171 ,  5.72781055, 10.50545839,  7.9090825 , 11.59943918,
        7.28407474,  8.64903212,  7.76709318, 15.45517457, 17.33119802,
       17.24302258, 15.39855019, 20.98111086,  4.66020941, 15.21828012,
        0.92275245,  2.30486946, 17.56547211,  3.60870732, 22.52701008,
        8.74894023, 10.32594621,  4.7507306 ,  8.09775873, 19.92816884,
       16.41226684, 13.42157508, 16.86255719, 15.25091864, 14.02837144,
        5.36868254, 14.83468343,  9.47369491, 13.43560428, 12.2033514 ,
        1.68493717, 14.69208269, 16.08298182,  2.05468284, 18.41342299,
       12.17298427,  7.87432592, 11.23250716, 15.9523319 ,  8.92694343,
        8.69744503, 15.80128402,  5.26131961, 14.13490187,  7.4321122 ,
       12.03602799,  8.03243036, 16.64414112, 11.58080978, 15.64

In [271]:
model_5.predict(X_test)

array([11.,  1., 12., 18., 10., 16.,  6.,  7., 20.,  4., 16., 17., 21.,
        4.,  0., 18.,  2., 23.,  2., 13.,  2., 10.,  4., 17., 17., 16.,
        6., 11.,  3.,  8.,  0.,  0., 22.,  1., 16.,  7., 15.,  0.,  3.,
       20., 18., 15., 23., 18., 23.,  0., 15., 13., 23.,  3.,  0., 18.,
        4.,  0., 22.,  4.,  4.,  8., 17., 23.,  6., 20.,  0., 22.,  7.,
       11.,  9., 17.,  8., 23., 20., 23., 13.,  6.,  7., 18., 19., 18.,
       11., 16.,  4.,  9.,  1.,  7., 16.,  6.,  1., 12., 10., 16., 12.,
       10.,  3.,  6., 19., 14.,  3., 17.,  2., 13., 23., 20., 12., 17.,
       12., 12., 13.,  1., 23., 17., 14.,  7.,  9., 15.,  2., 18.,  0.,
       15.,  6.,  6.,  1., 13., 12.,  1.,  8., 17.,  5.,  4., 19.,  4.,
        6.,  7.,  4., 22., 13., 19., 19.,  6.,  1.,  4., 13., 22., 21.,
        6.,  6.,  1.,  8., 23., 20.,  7., 13., 14.,  7., 19.,  8., 21.,
        9., 16., 15.,  5., 22.,  8., 17.,  4.,  2., 13.,  8.,  4.,  2.,
        2., 13., 13.,  4.,  4.,  3.,  7.,  2., 22., 12.,  2.,  5

In [261]:
y_test

112    11
227     1
97     12
11     18
77     10
       ..
109     6
38      0
164    16
50      6
201    21
Name: symbol, Length: 612, dtype: int64

In [262]:
# save model 1
#
# pickle.dump(model_3, open("model_3.sav", "wb"))

In [273]:
# save model 5

# pickle.dump(model_5, open("model_5.sav", "wb"))

In [263]:
# # load model
# model = pickle.load(open("{{model_name}}.sav", "rb"))

In [264]:
# we need to modify the data. Each row represents the xyz coordinates of several points on a hand
# we will convert this into a series of distances between the points and the center of the hand
test_data = pd.read_csv("testing_data_abcs.csv")
# test_data = pd.read_csv("training_data_0/C.csv")
test_data.drop(columns=["'symbol'"], inplace=True)

test_df = pd.DataFrame()
# get the center of the hand
for row in test_data.iterrows():
    # get the xyz coordinates of the points
    points = row[1][:].values.reshape(-1, 3)
    # get the center of the hand
    center = np.mean(points, axis=0)
    # get the distances between the points and the center
    distances = np.linalg.norm(points - center, axis=1)
    # add the distances to the normalized dataframe
    test_df = pd.concat([test_df, pd.DataFrame(distances).T])

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.nd

In [265]:
predictions = model_3.predict(test_df)
predictions

array([0.  , 0.  , 0.  , 0.  , 0.  , 0.27, 0.27, 0.66, 0.27, 0.  , 0.  ,
       1.69, 1.81, 0.73, 1.46, 1.2 , 1.85, 2.82, 0.32, 1.63, 1.5 , 1.17,
       1.5 , 1.52, 2.03, 2.99, 2.14, 2.88, 3.27, 3.42, 3.45, 0.31, 0.  ,
       0.27, 5.07, 3.81, 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  ,
       1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  ,
       1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  ,
       1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 2.28, 2.03, 1.99, 2.  , 1.99,
       1.99, 2.  , 1.99, 2.  , 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99,
       2.  , 1.99, 2.  , 2.2 , 2.  , 2.11, 2.  , 3.72, 3.89, 4.64, 4.68,
       3.3 , 4.26, 3.82, 3.39, 3.24, 3.03, 3.02, 3.02, 3.04, 3.04, 3.02,
       3.04, 3.  , 3.  , 3.  , 3.  , 3.  , 3.  , 3.66, 3.52, 3.17, 3.  ,
       3.31, 3.16, 3.07, 3.52, 3.07, 3.07, 3.41, 3.57, 3.68, 3.68, 4.07,
       3.17, 3.49, 3.24, 3.24, 3.24, 3.43, 3.43, 3.19, 3.19, 3.19, 3.38,
       3.17, 3.49, 3.03, 3.16, 3.17, 3.18, 3.16, 3.

In [266]:
# round the predictions
predictions = np.round(predictions)

In [267]:
# get the letter for each prediction
letters = [num_to_letter[int(prediction)] for prediction in predictions]

In [268]:
prev = None
for letter in letters:
    if prev is None:
        print(letter, end=", ")
        prev = letter
    else:
        if prev != letter:
            print(letter, end=", ")
            prev = letter

A, B, A, C, B, C, D, A, C, B, C, D, C, D, A, F, E, B, C, E, F, D, E, D, E, D, E, D, E, D, C, E, F, E, F, E, F, E, F, E, F, E, F, E, F, E, F, E, D, C, E, F, D, F, H, I, K, I, K, I, K, 