In [3]:
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
import xgboost as xgb
from sklearn.model_selection import train_test_split

In [4]:
df = pd.read_csv('gestures.csv')
df

Unnamed: 0,HandLandmark.WRIST_lmx,HandLandmark.WRIST_lmy,HandLandmark.THUMB_CMC_lmx,HandLandmark.THUMB_CMC_lmy,HandLandmark.THUMB_MCP_lmx,HandLandmark.THUMB_MCP_lmy,HandLandmark.THUMB_IP_lmx,HandLandmark.THUMB_IP_lmy,HandLandmark.THUMB_TIP_lmx,HandLandmark.THUMB_TIP_lmy,...,HandLandmark.RING_FINGER_TIP_lmy,HandLandmark.PINKY_MCP_lmx,HandLandmark.PINKY_MCP_lmy,HandLandmark.PINKY_PIP_lmx,HandLandmark.PINKY_PIP_lmy,HandLandmark.PINKY_DIP_lmx,HandLandmark.PINKY_DIP_lmy,HandLandmark.PINKY_TIP_lmx,HandLandmark.PINKY_TIP_lmy,gesture_name
0,863,2187,806,2020,770,1772,756,1569,738,1409,...,1825,959,1911,973,1799,968,1861,956,1912,A
1,827,1765,754,1648,717,1363,711,1133,694,990,...,1440,905,1369,912,1273,892,1416,882,1462,A
2,831,1949,774,1773,743,1574,730,1413,732,1298,...,1439,900,1503,901,1391,888,1460,887,1477,A
3,790,1370,714,1247,678,997,671,779,650,649,...,1040,858,950,857,841,840,985,837,1037,A
4,787,1282,709,1167,671,916,666,695,647,565,...,979,850,861,848,754,833,896,830,955,A
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1295,753,391,688,577,666,796,677,969,702,1063,...,888,838,848,800,954,786,893,787,827,Z
1296,753,393,687,576,664,797,673,981,696,1087,...,886,837,852,799,953,786,891,787,824,Z
1297,754,389,689,565,668,786,678,968,702,1071,...,895,837,856,801,955,787,895,788,829,Z
1298,752,399,686,582,663,799,672,976,697,1077,...,895,836,853,799,957,784,896,785,828,Z


In [5]:
len(df.values.tolist())

1300

In [6]:
# Convert the data columns into an array
data_array = df.drop(columns='gesture_name').values

# Map each unique gesture_name to an integer
gesture_mapping = {gesture: idx for idx, gesture in enumerate(df['gesture_name'].unique())}
target_array = df['gesture_name'].map(gesture_mapping).values

# Inverse mapping: from target number back to gesture_name
gesture_names = {idx: gesture for gesture, idx in gesture_mapping.items()}

gesture_data = {
    'data': data_array,
    'target': target_array
}


In [7]:
X_train, X_test, y_train, y_test = train_test_split(gesture_data['data'], gesture_data['target'], test_size=.2)
# create model instance
bst = XGBClassifier(n_estimators=200, max_depth=200, learning_rate=0.01, objective='binary:logistic')
# fit model
bst.fit(X_train, y_train)
# make predictions
preds = bst.predict(X_test)
predicted_names = [gesture_names[number] for number in preds]

In [8]:
actual_names = [gesture_names[number] for number in y_test]

In [9]:
identical_count = sum(1 for a, b in zip(predicted_names, actual_names) if a == b)
identical_rate = (identical_count / len(predicted_names)) * 100

print(f"Identical rate: {identical_rate:.2f}%")

Identical rate: 93.46%


In [11]:
# save to JSON
bst.save_model("model.json")
# save to text format
bst.save_model("model.txt")

In [25]:
model_xgb_2 = XGBClassifier()
model_xgb_2.load_model("model.json")

In [26]:
bst

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.01, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=200, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=200, n_jobs=None, num_parallel_tree=None,
              objective='multi:softprob', predictor=None, ...)

In [27]:
model_xgb_2

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.01, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=200, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=200, n_jobs=None, num_parallel_tree=None,
              objective='multi:softprob', predictor=None, ...)

In [63]:
gesture_names

{0: 'P',
 1: 'Q',
 2: 'R',
 3: 'S',
 4: 'T',
 5: 'U',
 6: 'V',
 7: 'W',
 8: 'X',
 9: 'Y',
 10: 'Z',
 11: 'Hi!',
 12: 'devil ;)',
 13: 'A',
 14: 'B',
 15: 'C',
 16: 'D',
 17: 'E',
 18: 'F',
 19: 'G',
 20: 'H',
 21: 'I',
 22: 'J',
 23: 'K',
 24: 'L',
 25: 'M',
 26: 'N',
 27: 'O'}

In [31]:
X_train, X_test, y_train, y_test = train_test_split(gesture_data['data'], gesture_data['target'], test_size=.2)
preds = model_xgb_2.predict(X_test)
predicted_names = [gesture_names[number] for number in preds]
actual_names = [gesture_names[number] for number in y_test]

In [32]:
identical_count = sum(1 for a, b in zip(predicted_names, actual_names) if a == b)
identical_rate = (identical_count / len(predicted_names)) * 100

print(f"Identical rate: {identical_rate:.2f}%")

Identical rate: 98.93%


In [59]:
model_xgb_2.predict(X_test[7].reshape(1, -1))

array([2])

In [62]:
len(X_test[0])

42