In [26]:
# |---dataset
#     |---left_kulak.npy
#     |---right_kulak.npy
# |--->tutorial.ipynb

# import 

In [1]:
import cv2
import numpy as np
import mediapipe as mp
import time
import torch
import torch.nn as nn
import os
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

In [2]:
# MediaPipe
npDraw = mp.solutions.drawing_utils
mpHands = mp.solutions.hands
hands = mp.solutions.hands.Hands(static_image_mode=False,
                         max_num_hands=2,
                         min_tracking_confidence=0.5,
                         min_detection_confidence=0.5)

# Функция для записи жестов

In [41]:
def record(name):
    data = []
    #Зацикливаем получение кадров от камеры
    cap = cv2.VideoCapture(0) 
    while True: 
        success, img = cap.read()
        img = cv2.flip(img,1)
        h,w,c = img.shape
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        results = hands.process(imgRGB)

        if results.multi_hand_landmarks:
            for handLms in results.multi_hand_landmarks:
                tmp = []
                for id, lm in enumerate(handLms.landmark):
                    tmp.append(lm.x)
                    tmp.append(lm.y)


                npDraw.draw_landmarks(img, handLms, mpHands.HAND_CONNECTIONS)
                data.append(tmp)


        cv2.putText(img, str(len(data)),(100,30), cv2.FONT_HERSHEY_PLAIN, 2, (255,0,0), 2)
        cv2.imshow('python', img)
        if cv2.waitKey(1)>-1:
            break

    cv2.destroyAllWindows()
    np.save(name, data)
    return np.array(data)

In [19]:
r = record('dataset/right_kulak.npy')

# Выгрузка жестов 

In [3]:
data = []
names = []
for gesture in os.listdir('dataset'):
    names.append(gesture[:-4])
    array = np.load(f'dataset/{gesture}')
    data.append(array)
    
print(f'total gestures: {len(data)}')
print(f'names: {names}')

total gestures: 2
names: ['left_kulak', 'right_kulak']


# dataset, dataloader

In [4]:
class Dataset(Dataset): 
    def __init__(self, tmp): #получаем n массивов данных 
        data = []
        labels = []
        for i, gesture in enumerate(tmp):
            for sample in gesture:
                data.append(sample)
                labels.append(i)
        
        self.data = torch.tensor(data).type(torch.float)
        self.labels = torch.tensor(labels)
        
    def __getitem__(self, idx): 
        return self.data[idx], self.labels[idx] 
     
    def __len__(self): 
        return len(self.data)

In [5]:
dataset = Dataset(data)
dataloader = DataLoader(dataset, shuffle=True, batch_size=16)
print(f'total smaples: {len(dataset)}')

total smaples: 63


# Модель

In [6]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(42, 256)
        self.fc2 = nn.Linear(256, 64)
        self.fc3 = nn.Linear(64, 6)
    def forward(self, x):
        x = F.leaky_relu(self.fc1(x),negative_slope=0.001)
        x = F.leaky_relu(self.fc2(x),negative_slope=0.001)
        x = self.fc3(x)
        return x

net = Net()

# Обученние

In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.001, weight_decay=1e-5)
for epoch in range(15):
    running_loss = 0.0
    for samples, labels in dataloader:
        pred = net(samples)
        loss = criterion(pred, labels)
        
        loss.backward()
        optimizer.step()
        
        optimizer.zero_grad()
        running_loss += loss.item()
    print(running_loss/len(dataloader))
print('Finished Training')

1.7028637528419495
1.4426083862781525
1.1657361686229706
0.8920300453901291
0.7171850353479385
0.6410658061504364
0.6049304902553558
0.5864267647266388
0.5598400980234146
0.5416528955101967
0.5187678039073944
0.49206987768411636
0.473278671503067
0.44399627298116684
0.4257565438747406
Finished Training


# Скрипт для теста 

In [25]:
cap = cv2.VideoCapture(0)
while True: 
    success, img = cap.read()
    img = cv2.flip(img,1)
    h,w,c = img.shape
    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = hands.process(imgRGB)

    if results.multi_hand_landmarks:
        for handLms in results.multi_hand_landmarks:
            tmp = []
            for id, lm in enumerate(handLms.landmark):
                tmp.append(lm.x)
                tmp.append(lm.y)


            npDraw.draw_landmarks(img, handLms, mpHands.HAND_CONNECTIONS)
            
            for_model = torch.tensor(tmp).type(torch.float)
            pred = net(for_model)
            
            v, i = torch.max(pred, 0)
            
            cv2.putText(img, names[i.item()],(30,30), cv2.FONT_HERSHEY_PLAIN, 2, (255,0,0), 2)
    cv2.imshow('python', img)
    if cv2.waitKey(1)>-1:
        break

cv2.destroyAllWindows()
