In [107]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
import torch.optim as optim
from sklearn.utils import shuffle

import cv2
import mediapipe as mp

handsModule = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

In [108]:
data = shuffle(pd.read_csv("gestures_data.csv"))
data

Unnamed: 0,class,x1,y1,x2,y2,x3,y3,x4,y4,x5,...,x17,y17,x18,y18,x19,y19,x20,y20,x21,y21
5188,2,0.198417,0.557235,0.217409,0.662339,0.268550,0.734442,0.295123,0.796152,0.307173,...,0.262487,0.566554,0.308981,0.464506,0.274572,0.508587,0.249077,0.521550,0.257909,0.507398
4746,3,0.213529,0.576731,0.257309,0.575272,0.302075,0.540928,0.325771,0.495553,0.311015,...,0.259843,0.290698,0.216476,0.436709,0.223121,0.381009,0.229489,0.352547,0.232076,0.320278
6399,5,0.140430,0.703924,0.209503,0.696566,0.273029,0.629750,0.325201,0.583692,0.377326,...,0.137300,0.600990,0.084259,0.534127,0.073769,0.512049,0.088571,0.575196,0.096195,0.591214
4001,0,0.255297,0.593494,0.289986,0.610349,0.310282,0.639323,0.325730,0.667053,0.336533,...,0.207301,0.805734,0.193636,0.629203,0.176530,0.672817,0.176834,0.720038,0.178616,0.768300
5983,0,0.276524,0.750914,0.375573,0.747577,0.434010,0.643148,0.453828,0.529085,0.492041,...,0.264902,0.145801,0.202373,0.522698,0.201782,0.391771,0.204075,0.316772,0.206312,0.237854
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4497,1,0.215314,0.770532,0.230555,0.682660,0.266249,0.613618,0.285453,0.560318,0.282030,...,0.321920,0.769120,0.301699,0.840366,0.353982,0.816874,0.335439,0.806793,0.314065,0.809248
2466,0,0.201892,0.508687,0.251272,0.489458,0.276723,0.429785,0.284563,0.362938,0.303853,...,0.151294,0.146517,0.145439,0.365704,0.136327,0.294515,0.133118,0.252755,0.131906,0.210584
797,0,0.863741,0.894225,0.836424,0.858505,0.823916,0.819842,0.816522,0.785293,0.808597,...,0.799144,0.735372,0.846377,0.785367,0.830712,0.755107,0.814353,0.744896,0.802527,0.742186
6784,1,0.267921,0.483193,0.280687,0.404953,0.316910,0.316777,0.327456,0.238351,0.310118,...,0.365918,0.464608,0.330984,0.499688,0.405601,0.521907,0.380963,0.527025,0.357577,0.518265


In [109]:
x_train = data.iloc[:7000,1:].values.tolist()
y_train = data.iloc[:7000,0].values.tolist()
x_test = data.iloc[7000:,1:].values.tolist()
y_test = data.iloc[7000:,0].values.tolist()

In [110]:
x_train = torch.Tensor(x_train)
y_train = torch.Tensor(y_train).long()
x_test = torch.Tensor(x_test)
y_test = torch.Tensor(y_test).long()

In [111]:
class Network(nn.Module):

    def __init__(self):
        super().__init__()

        self.fc1 = nn.Linear(42, 32)
        self.b1 = nn.BatchNorm1d(32)
        self.fc2 = nn.Linear(32, 32)
        self.b2 = nn.BatchNorm1d(32)
        self.fc3 = nn.Linear(32,16)
        self.b3 = nn.BatchNorm1d(16)
        self.fc4 = nn.Linear(16,6)

    def forward(self,x):

        x = F.relu(self.fc1(x))
        x = self.b1(x)
        x = F.relu(self.fc2(x))
        x = self.b2(x)
        x = F.relu(self.fc3(x))
        x = self.b3(x)
        x = F.softmax(self.fc4(x), dim = 1)

        return x

In [112]:
net = Network()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.1)

def train(model, x, y, optimizer, criterion):
    model.zero_grad()           # zeroes the gradient buffers of all parameters
    output = model(x)           # forward
    loss =criterion(output,y)   # calculate the loss
    loss.backward()             # back propagation
    optimizer.step()            # update gradients
    return loss

In [113]:
for i in range(100):
    train(net, x_train, y_train, optimizer, criterion)

In [114]:
result = net(x_test)

In [115]:
plus = 0
for i in range(len(result)):
    if torch.argmax(result[i]) == y_test[i]:
        plus += 1
plus, len(result)

(544, 568)

In [116]:
cap = cv2.VideoCapture(0)

with handsModule.Hands(static_image_mode=True, max_num_hands=4, min_detection_confidence=0.8) as hands:
    while cap.isOpened():
        ret, image = cap.read()
        cv2.flip(image, 1)
        results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
         
        if results.multi_hand_landmarks:
            for handLandmarks in results.multi_hand_landmarks:
                # process each hand list of landmarks
                mp_drawing.draw_landmarks(image, handLandmarks, handsModule.HAND_CONNECTIONS)
                data = [[],[0]*42]
                for point in handsModule.HandLandmark:
                    # process each landmark
                    
                    # To access the actual list of landmarks of the hand by index, we cannot directly 
                    # use the handLandmarks variable. We need to access its landmark field instead.
                    normalizedLandmark = handLandmarks.landmark[point]
                    
                    data[0].append(normalizedLandmark.x)
                    data[0].append(normalizedLandmark.y)
                
                d = torch.argmax(net(torch.Tensor(data))[0])
                print(d)
                
                               
        cv2.imshow("Hand Tracking", image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(5)
tensor(0)
tensor(2)
tensor(0)
tensor(1)
tensor(0)
tensor(1)
tensor(0)
tensor(4)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(5)
tensor(3)
tensor(5)
tensor(3)
tensor(3)
tensor(2)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)
tensor(3)


In [117]:
answers = {"palm": 0, "thumb up": 1, "thumb down": 2, "ok": 3, "fist": 4, "l": 5}
cap.release()
cv2.destroyAllWindows()