In [25]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


import pickle

In [26]:
# Load the data.
# The data is stored in a bunch of csv files
# for example data for the letter A is stored in the file "training_data_0/A.csv"
# we will load all the data into a pandas dataframe
df = pd.DataFrame()

num_to_letter = dict()
letter_to_num = dict()

# index and loop through the letters
for i, letter in enumerate("ABCDEFGHIKLMNOPQRSTUVWXY"):
    # read the csv file
    df_letter = pd.read_csv(f"training_data_0/{letter}.csv")
    # drop the "'symbol'" column
    df_letter = df_letter.drop(columns=["'symbol'"])
    # add a column to the dataframe with the letter
    df_letter["symbol"] = i
    # add the letter to the letter maps
    num_to_letter[i] = letter
    letter_to_num[letter] = i
    # add the dataframe to the main dataframe
    df = pd.concat([df, df_letter])

# rename the "'symbol'" column to "symbol"
df = df.rename(columns={"'symbol'": "symbol"})

df.head()

Unnamed: 0,x1,y1,z1,x2,y2,z2,x3,y3,z3,x4,...,x19,y19,z19,x20,y20,z20,x21,y21,z21,symbol
0,0.284234,0.599662,1.510391e-09,0.352661,0.57424,-0.018644,0.401655,0.505305,-0.03125,0.417682,...,0.26475,0.411588,-0.051163,0.270928,0.466022,-0.052875,0.276105,0.505524,-0.052011,0
1,0.285704,0.594281,-4.252064e-09,0.351391,0.575788,-0.023778,0.397199,0.503982,-0.038397,0.411797,...,0.26084,0.409029,-0.05418,0.269301,0.466309,-0.058145,0.274917,0.504831,-0.057939,0
2,0.282022,0.592181,-4.262098e-09,0.348222,0.574958,-0.024807,0.395815,0.503679,-0.039821,0.411231,...,0.257454,0.408016,-0.052401,0.265423,0.464812,-0.05643,0.270709,0.503389,-0.056365,0
3,0.27987,0.594496,-3.989998e-09,0.34521,0.575632,-0.021771,0.391491,0.502638,-0.035953,0.406466,...,0.254278,0.407087,-0.054067,0.262207,0.463662,-0.057304,0.266132,0.502502,-0.056816,0
4,0.27503,0.592625,-4.403823e-09,0.340713,0.573089,-0.021498,0.387466,0.500672,-0.035819,0.402215,...,0.248197,0.406337,-0.052727,0.256451,0.461449,-0.05608,0.261073,0.500557,-0.055821,0


In [27]:
df["symbol"].value_counts()

4     375
2     313
17    298
11    283
14    260
20    257
12    256
19    252
15    248
10    247
22    246
9     245
21    244
3     243
7     241
18    238
6     237
5     237
1     234
23    233
16    233
13    233
8     233
0     225
Name: symbol, dtype: int64

In [28]:
df.describe()

Unnamed: 0,x1,y1,z1,x2,y2,z2,x3,y3,z3,x4,...,x19,y19,z19,x20,y20,z20,x21,y21,z21,symbol
count,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,...,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0
mean,0.440433,0.62275,5.929709e-10,0.486052,0.603479,-0.018906,0.518731,0.554779,-0.037185,0.521704,...,0.430614,0.464734,-0.072318,0.439989,0.486142,-0.076378,0.445179,0.497874,-0.077505,11.356079
std,0.191912,0.181983,5.487365e-09,0.196405,0.182482,0.022592,0.204327,0.187479,0.033304,0.217539,...,0.220695,0.19434,0.026841,0.221597,0.198481,0.026343,0.219653,0.201887,0.027505,6.902894
min,0.090501,0.141086,-1.96911e-08,0.109391,0.128427,-0.107338,0.103584,0.105467,-0.156331,0.059861,...,0.023418,0.080981,-0.191882,0.025751,0.072168,-0.199254,0.031057,0.041557,-0.197314,0.0
25%,0.289168,0.476999,-2.008002e-09,0.3276,0.453367,-0.031281,0.355535,0.392496,-0.053778,0.344039,...,0.251514,0.305164,-0.08848,0.258871,0.333067,-0.090959,0.265973,0.349207,-0.092341,5.0
50%,0.390545,0.595534,1.151827e-09,0.441132,0.579819,-0.019073,0.478165,0.5405,-0.036178,0.477882,...,0.379583,0.441283,-0.073844,0.391482,0.463063,-0.075681,0.394272,0.477647,-0.075551,11.0
75%,0.62665,0.792995,3.73428e-09,0.678022,0.777037,-0.007548,0.7202,0.731449,-0.021938,0.738128,...,0.647629,0.627818,-0.056358,0.65967,0.630992,-0.06109,0.65934,0.644602,-0.061332,17.0
max,0.836572,1.025575,1.916064e-08,0.898871,0.997497,0.082677,0.945384,0.95046,0.117195,0.976453,...,0.916674,0.986162,0.053344,0.90374,1.020965,0.040676,0.890713,1.031843,0.034286,23.0


In [29]:
df.dtypes

x1        float64
y1        float64
z1        float64
x2        float64
y2        float64
           ...   
z20       float64
x21       float64
y21       float64
z21       float64
symbol      int64
Length: 64, dtype: object

In [30]:
# we need to modify the data. Each row represents the xyz coordinates of several points on a hand
# we will convert this into a series of distances between the points and the center of the hand
normalized_df = pd.DataFrame()
# get the center of the hand
first = True
for row in df.iterrows():
    # get the xyz coordinates of the points
    if first:
        print(row[1][:].values)
        print(row[1][:-1].values)
        first = False
    points = row[1][:-1].values.reshape(-1, 3)
    # get the center of the hand
    center = np.mean(points, axis=0)
    # get the distances between the points and the center
    distances = np.linalg.norm(points - center, axis=1)
    # add the distances to the normalized dataframe
    normalized_df = pd.concat([normalized_df, pd.DataFrame(distances).T])

[ 2.84234434e-01  5.99662066e-01  1.51039137e-09  3.52660865e-01
  5.74240446e-01 -1.86437536e-02  4.01654959e-01  5.05305409e-01
 -3.12500969e-02  4.17681813e-01  4.36548412e-01 -4.56334427e-02
  4.22503233e-01  3.77860457e-01 -5.84865883e-02  3.74746442e-01
  4.36910540e-01 -8.12254008e-03  3.80444705e-01  3.89682174e-01
 -2.00392306e-02  3.67883712e-01  4.49001282e-01 -2.07441952e-02
  3.63591522e-01  4.97271508e-01 -2.23034602e-02  3.35153967e-01
  4.29939628e-01 -1.52731035e-02  3.40880424e-01  3.88413638e-01
 -4.07350659e-02  3.35393161e-01  4.62964505e-01 -5.23742251e-02
  3.34779263e-01  5.15209615e-01 -5.54651171e-02  2.94720203e-01
  4.33333546e-01 -2.50072163e-02  3.02414715e-01  3.95723999e-01
 -5.02379835e-02  3.02014083e-01  4.72031504e-01 -5.67812920e-02
  3.04903924e-01  5.23163736e-01 -5.66067770e-02  2.54041135e-01
  4.42956179e-01 -3.69402952e-02  2.64749527e-01  4.11588371e-01
 -5.11631295e-02  2.70927608e-01  4.66022164e-01 -5.28754666e-02
  2.76105493e-01  5.05524

In [31]:
normalized_df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20
count,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,...,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0,6111.0
mean,0.16888,0.135518,0.092419,0.070245,0.078422,0.06243,0.080998,0.092487,0.110193,0.049627,...,0.06993,0.08997,0.054527,0.052702,0.055431,0.074259,0.076923,0.068345,0.074658,0.087115
std,0.039238,0.037373,0.034306,0.030727,0.031394,0.013824,0.021424,0.038436,0.054275,0.019953,...,0.043893,0.063577,0.018517,0.02098,0.028128,0.041497,0.015087,0.019747,0.0272,0.036576
min,0.038313,0.030872,0.022458,0.001777,0.007949,0.00287,0.011499,0.007262,0.002656,0.009778,...,0.006704,0.004441,0.008384,0.005722,0.003783,0.002551,0.007935,0.010838,0.014071,0.013151
25%,0.140355,0.108687,0.065736,0.04582,0.05644,0.052694,0.067097,0.058526,0.059297,0.035693,...,0.032528,0.034391,0.040797,0.037143,0.03484,0.046042,0.066589,0.054724,0.057067,0.061895
50%,0.163896,0.128998,0.084116,0.069041,0.074566,0.06065,0.080629,0.088747,0.107295,0.043712,...,0.054058,0.068965,0.050821,0.046897,0.045515,0.061525,0.078056,0.062134,0.06994,0.076313
75%,0.194633,0.158656,0.115661,0.088772,0.096883,0.070891,0.092415,0.123537,0.156276,0.057363,...,0.111149,0.15731,0.06412,0.067974,0.070391,0.088147,0.088172,0.081652,0.083545,0.104001
max,0.271586,0.242121,0.192109,0.169416,0.160228,0.114997,0.146094,0.21062,0.263544,0.119883,...,0.166165,0.220927,0.115243,0.118714,0.149044,0.203736,0.121115,0.141722,0.183354,0.22181


In [32]:
# we will create a neural network with 2 hidden layers
model = tf.keras.models.Sequential([
    tf.keras.layers.InputLayer(input_shape=(21,)),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(48, activation="relu"),
    tf.keras.layers.Dense(24, activation="softmax")
])

# use xgb regressor
from xgboost import XGBRegressor

model_2 = XGBRegressor(learning_rate=0.1, n_estimators=100, max_depth=3, min_child_weight=1, subsample=0.8, colsample_bytree=0.8, scale_pos_weight=1, seed=27)

# use random forest regressor
from sklearn.ensemble import RandomForestRegressor

model_3 = RandomForestRegressor()

# use linear regression
from sklearn.linear_model import LinearRegression

model_4 = LinearRegression()

# use a decision tree
from sklearn.tree import DecisionTreeRegressor

model_5 = DecisionTreeRegressor()

In [33]:
# use test train split to split the data into training and validation data
from sklearn.model_selection import train_test_split

X = normalized_df
y = df["symbol"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

In [34]:
# compile the model
model.compile(
    loss='mean_absolute_error',
    optimizer="adam",
    metrics=['mean_absolute_error', 'mean_squared_error', 'accuracy']
)

# train the model
model.fit(
    X_train, y_train,
    epochs=10,
)

model_2.fit(
    X_train, y_train
)

model_3.fit(
    X_train, y_train
)

model_4.fit(
    X_train, y_train
)

model_5.fit(
    X_train, y_train
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [35]:
# test the model
model.evaluate(X_test, y_test)



[11.500953674316406,
 11.500953674316406,
 179.82119750976562,
 0.04575163498520851]

In [36]:
model_2.score(X_test, y_test)

0.955179910746305

In [37]:
model_3.score(X_test, y_test)

0.9907190423335627

In [38]:
model_4.score(X_test, y_test)

0.5024810582084747

In [39]:
model_5.score(X_test, y_test)

0.9593454916783885

In [40]:
model_2.predict(X_test)

array([11.984345  ,  9.28834   ,  9.356735  , 18.666668  ,  2.20147   ,
        1.1652468 , 15.969303  , 11.278209  , 12.212861  , 20.398632  ,
        7.8426433 , 18.797897  , 11.589216  , 22.290146  ,  7.5565186 ,
       10.267158  , 11.023238  , 14.457974  , 17.573338  , 18.54646   ,
       10.463958  , 11.869989  ,  6.0253615 , 20.896784  , 14.355469  ,
       11.495165  , 18.97626   ,  7.5592318 , 18.34719   , 10.292652  ,
       22.380877  ,  1.2035023 ,  2.2701275 , 10.122029  ,  9.909293  ,
       20.019999  , 10.65891   , 10.400291  ,  5.4128895 ,  4.6718655 ,
       22.056522  ,  7.71025   , 12.944131  , 13.97169   ,  0.9965097 ,
       19.149408  , 20.48465   , 17.507776  ,  4.2144704 ,  9.810832  ,
        6.0507007 ,  5.843601  , 13.882627  ,  8.790607  ,  7.6794825 ,
        9.708336  , 16.432808  ,  9.762485  , 17.78081   , 13.583209  ,
       19.648167  ,  4.6993127 ,  5.1018014 ,  5.1292834 ,  7.800154  ,
       18.817059  ,  1.2627147 ,  1.801601  , 19.694162  , 17.17

In [41]:
model_3.predict(X_test)

array([12.99,  8.  ,  9.84, 20.8 ,  2.  ,  0.  , 18.78, 12.84, 10.87,
       21.81,  6.96, 18.53, 12.82, 23.  ,  7.99, 12.16, 10.  , 12.88,
       18.  , 19.  , 11.25, 13.6 ,  3.  , 22.  , 14.96, 13.  , 17.12,
        7.99, 16.97, 11.65, 22.85,  1.  ,  3.07, 10.  ,  9.  , 19.87,
       11.  , 10.4 ,  3.98,  4.  , 21.  ,  7.33, 12.23, 15.  ,  1.  ,
       19.04, 21.  , 17.  ,  4.51,  8.  ,  7.  ,  3.  , 14.64,  7.72,
        7.04,  7.1 , 16.95,  8.  , 15.99, 15.  , 21.  ,  3.  ,  5.  ,
        5.  ,  7.14, 18.93,  0.  ,  2.  , 19.83, 17.18, 13.  , 11.  ,
        3.  , 17.82,  3.  , 11.  ,  8.  , 12.01,  7.02, 18.42, 11.  ,
        8.89,  9.  , 17.9 , 16.37,  2.24,  3.  , 14.  , 16.26,  3.01,
       11.01, 12.97, 12.11, 22.  ,  8.  ,  2.  ,  0.  ,  6.06, 16.97,
       10.  , 13.  , 21.  , 15.99, 16.24,  8.  ,  8.  ,  7.  ,  1.  ,
       11.  ,  4.  , 16.08,  5.  , 12.89, 18.  , 22.  ,  9.01, 13.98,
       16.  , 12.  ,  9.16, 20.8 ,  0.  , 17.01, 14.03, 12.82, 15.12,
        6.98,  7.  ,

In [42]:
model_4.predict(X_test)

array([ 7.39923889,  5.27365305,  8.47495295,  9.15909141,  2.06758822,
        4.52129373, 15.18947275,  9.45301648, 12.33298397, 15.60202037,
       11.69027324, 20.13139257, 10.29175973, 14.32635849, 11.23499407,
        6.85471553, 10.46509879, 11.85421437, 15.75592008, 16.75876355,
        8.43026945, 10.44998252,  8.0398401 , 18.4319977 ,  5.90914419,
        9.81148289, 15.2578573 , 11.55233227, 15.50935469,  3.30912906,
       17.33473832,  6.60678785,  9.87666074,  9.04257538,  9.83114837,
       17.91833622, 15.1649101 , 12.88977745, 16.31398296,  2.62807381,
       12.49766312,  9.6046923 ,  9.53988441, 14.12522064,  4.60915421,
       19.24660637, 12.8628942 , 14.18743794,  6.47371657, 14.17264598,
        7.11697632, 11.76859941, 10.56250271, 14.76473319, 14.65508173,
        8.45235023, 15.90505971, 13.70948389, 23.72343125,  9.42156546,
       10.41376227,  5.06127047,  6.04821701,  7.1934272 ,  9.5921341 ,
       18.8499255 ,  6.80943889,  7.46860984, 15.01578267, 14.86

In [43]:
model_5.predict(X_test)

array([13.,  8., 10., 21.,  2.,  0., 21., 13., 11., 22.,  7., 19., 13.,
       23.,  8., 13., 10., 12., 18., 19., 11.,  4.,  3., 22., 15., 13.,
       17.,  8., 17., 12., 23.,  1.,  3., 10.,  9., 20., 11.,  4.,  4.,
        4., 21.,  7., 13., 15.,  1., 19., 21., 17.,  4.,  8.,  7.,  3.,
       15.,  7.,  7.,  7., 17.,  8., 16., 15., 21.,  3.,  5.,  5.,  7.,
       19.,  0.,  2., 20., 17., 13., 11.,  3., 18.,  3., 11.,  8., 12.,
        7., 22., 11.,  9.,  9., 18., 17.,  2.,  3., 14., 16.,  3., 11.,
       14., 12., 22.,  8.,  2.,  0.,  6., 17., 10., 13., 21., 16., 16.,
        8.,  8.,  7.,  1., 11.,  4., 15.,  5., 13., 18., 22.,  9., 14.,
       16., 12.,  8., 21.,  0., 17., 14., 13., 22.,  7.,  7., 10.,  2.,
        5., 22., 16., 23., 16., 23.,  8.,  1., 16., 11., 13.,  9.,  4.,
       23., 15., 16.,  8.,  6., 12., 11., 11.,  9.,  2., 10., 10.,  5.,
       18.,  9.,  2., 10.,  6.,  8.,  3.,  0., 16., 17., 18., 22.,  3.,
       22.,  9., 22., 22., 15.,  4.,  1.,  9., 15.,  6., 20., 21

In [44]:
y_test

183    13
132     8
36     10
72     21
189     2
       ..
22      4
191    11
45     23
22      8
243    12
Name: symbol, Length: 612, dtype: int64

In [46]:
# save model 5

# pickle.dump(model_5, open("model_5.sav", "wb"))

In [47]:
# # load model
# model = pickle.load(open("{{model_name}}.sav", "rb"))

In [49]:
# we need to modify the data. Each row represents the xyz coordinates of several points on a hand
# we will convert this into a series of distances between the points and the center of the hand
test_data = pd.read_csv("testing_data_abcs.csv")
# test_data = pd.read_csv("training_data_0/C.csv")
test_data.drop(columns=["'symbol'"], inplace=True)

test_df = pd.DataFrame()
# get the center of the hand
for row in test_data.iterrows():
    # get the xyz coordinates of the points
    points = row[1][:].values.reshape(-1, 3)
    # get the center of the hand
    center = np.mean(points, axis=0)
    # get the distances between the points and the center
    distances = np.linalg.norm(points - center, axis=1)
    # add the distances to the normalized dataframe
    test_df = pd.concat([test_df, pd.DataFrame(distances).T])

In [50]:
predictions = model_5.predict(test_df)
predictions

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0., 11., 11., 11.,  0.,  0.,  0.,  4.,  3.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  2.,  2.,  2.,  2.,  2.,  2.,
        2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,
        2.,  2.,  2.,  2., 12.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,
        3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,
        3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,
        3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,
        3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,
        3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  2.,  2.,  2.,
        2.,  2.,  2.,  2.,  2.,  2.,  2.,  4.,  4.,  4.,  4.,  4

In [51]:
# round the predictions
predictions = np.round(predictions)

In [52]:
# get the letter for each prediction
letters = [num_to_letter[int(prediction)] for prediction in predictions]

In [53]:
prev = None
for letter in letters:
    if prev is None:
        print(letter, end=", ")
        prev = letter
    else:
        if prev != letter:
            print(letter, end=", ")
            prev = letter

A, M, A, E, D, B, C, N, D, C, E, C, E, C, E, F, G, D, O, D, O, 