In [602]:
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn import preprocessing
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [603]:
columns = ["wrist_x", "wrist_y", "thumb_cmc_x", "thumb_cmc_y", "thumb_mcp_x", "thumb_mcp_y", "thumb_ip_x", "thumb_ip_y", 
           "thumb_tip_x", "thumb_tip_y", "index_finger_mcp_x", "index_finger_mcp_y", "index_finger_pip_x", "index_finger_pip_y",
           "index_finger_dip_x", "index_finger_dip_y", "index_finger_tip_x", "index_finger_tip_y", "middle_finger_mcp_x", "middle_finger_mcp_y",
           "middle_finger_pip_x", "middle_finger_pip_y", "middle_finger_dip_x", "middle_finger_dip_y", "middle_finger_tip_x", "middle_finger_tip_y",
           "ring_finger_mcp_x", "ring_finger_mcp_y", "ring_finger_pip_x", "ring_finger_pip_y", "ring_finger_dip_x", "ring_finger_dip_y",
           "ring_finger_tip_x", "ring_finger_tip_y", "pinky_mcp_x", "pinky_mcp_y", "pinky_pip_x", "pinky_pip_y", "pinky_dip_x", "pinky_dip_y",
           "pinky_tip_x", "pinky_tip_y"]

actual_columns = []
for loc in ["right_", "left_"]:
    for column in columns:
        new_column = loc + column
        actual_columns.append(new_column)

In [604]:
file_paths = ["hello.csv", "meet.csv", "nice.csv", "everyone.csv"]
data_frames_x = [pd.read_csv(file_path).iloc[:50, :] for file_path in file_paths]

In [605]:
data_series_y = [pd.Series([i for _ in range(data_frames_x[i].shape[0])]) for i in range(len(file_paths))]

In [606]:
X = pd.concat(data_frames_x, axis=0, ignore_index=True)
Y = pd.concat(data_series_y, axis=0, ignore_index=True)
X = X.to_numpy()
Y = Y.to_numpy()

In [607]:
np.random.seed(0) # For reproducibility purposes

# Shuffle the order of the training examples.
indices = np.arange(X.shape[0])
shuffled_indices = np.random.permutation(indices)

In [608]:
X = X[shuffled_indices]
Y = Y[shuffled_indices]

In [609]:
# scaler = preprocessing.MinMaxScaler()
# scaled = scaler.fit_transform(X)
scaled = X / 1000

In [610]:
X_train, X_test, y_train, y_test = train_test_split(scaled, Y, test_size=0.2, random_state=0)

In [611]:
X.shape

(150, 126)

In [612]:
logreg = SVC(kernel = 'poly', probability=True)
logreg.fit(X_train, y_train)

SVC(kernel='poly', probability=True)

In [613]:
y_pred = logreg.predict(X_test)
print('Accuracy of logistic regression classifier on test set: {:.2f}'.format(logreg.score(X_test, y_test)))

Accuracy of logistic regression classifier on test set: 1.00


In [614]:
accuracy_score(y_test, y_pred)

1.0

In [615]:
file_name = 'sign_language.sav'
pickle.dump(logreg, open(file_name, 'wb'))

In [616]:
yp = logreg.predict_proba(X_test)

In [617]:
yp

array([[0.02626854, 0.01216015, 0.96157131],
       [0.02959591, 0.01236162, 0.95804247],
       [0.03373035, 0.01587253, 0.95039712],
       [0.9638836 , 0.01217996, 0.02393644],
       [0.01079879, 0.96697316, 0.02222805],
       [0.96394243, 0.01217848, 0.02387909],
       [0.01149185, 0.01294376, 0.97556439],
       [0.02558856, 0.01224747, 0.96216397],
       [0.03520904, 0.01196051, 0.95283044],
       [0.01130763, 0.96540657, 0.02328581],
       [0.96382413, 0.01218107, 0.0239948 ],
       [0.02879235, 0.01213596, 0.95907169],
       [0.96364113, 0.01218587, 0.024173  ],
       [0.04492623, 0.01558366, 0.93949011],
       [0.01119373, 0.96574575, 0.02306052],
       [0.01091196, 0.96671543, 0.02237261],
       [0.96292898, 0.0122014 , 0.02486961],
       [0.04522845, 0.01561588, 0.93915567],
       [0.01082263, 0.96664166, 0.02253572],
       [0.0112992 , 0.96542159, 0.02327921],
       [0.04181619, 0.01177376, 0.94641004],
       [0.01165253, 0.96431446, 0.024033  ],
       [0.