In [2]:
import os
import cv2
import numpy as np
import pandas as pd
import itertools
from collections import deque

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    LSTM, GRU, Dense, Dropout, Bidirectional,
    LayerNormalization
)
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical, Sequence

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import mediapipe as mp


In [3]:
data = pd.read_csv('Train.csv')

In [4]:
X, y = data["video_id"], data["label_id"]

In [5]:
X

0             1
1             3
2             6
3            11
4            14
          ...  
50415    148084
50416    148085
50417    148088
50418    148090
50419    148092
Name: video_id, Length: 50420, dtype: int64

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=13)

In [7]:
X_train

32140     94260
23138     67962
7504      21782
25753     75644
24262     71245
          ...  
22260     65339
33634     98626
32842     96312
47280    138821
33106     97069
Name: video_id, Length: 40336, dtype: int64

In [8]:
model = Sequential([
    Bidirectional(GRU(64, return_sequences=True, input_shape=(37, 63), recurrent_dropout=0.2)),
    LayerNormalization(),
    GRU(64, recurrent_dropout=0.2),
    Dropout(0.3),
    Dense(16, activation='relu'),
    Dense(27, activation='softmax')
])
    



In [9]:
class HandSequenceGenerator(Sequence):
    def __init__(self, video_ids, labels, batch_size=16, data_dir='dataset', num_classes=5, shuffle=True):
        self.video_ids = video_ids
        self.labels = labels
        self.batch_size = batch_size
        self.data_dir = data_dir
        self.num_classes = num_classes
        self.shuffle = shuffle

        self.indexes = np.arange(len(self.video_ids))
        self.on_epoch_end()

        self.mp_hands = mp.solutions.hands
        self.hands = self.mp_hands.Hands(
            max_num_hands=1,
            min_detection_confidence=0.5,
            static_image_mode=True
        )

    def __len__(self):
        return int(np.ceil(len(self.video_ids) / self.batch_size))

    def __getitem__(self, index):
        batch_ids = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        batch_video_ids = [self.video_ids[k] for k in batch_ids]
        batch_labels = [self.labels[k] for k in batch_ids]

        X = np.zeros((len(batch_video_ids), 37, 63))
        y = np.zeros((len(batch_video_ids), self.num_classes))

        for i, vid in enumerate(batch_video_ids):
            folder = os.path.join(self.data_dir, str(vid))
            frames = sorted([f for f in os.listdir(folder) if f.endswith('.jpg')])

            sequence = []

            for frame_name in frames[:37]:
                image = cv2.imread(os.path.join(folder, frame_name))
                height, width, _ = image.shape
                results = self.hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

                hand = []
                if results.multi_hand_landmarks:
                    for lm in results.multi_hand_landmarks[0].landmark:
                        hand.extend([lm.x * width, lm.y * height, lm.z * width])
                else:
                    hand = [0] * 63

                sequence.append(hand)

            if len(sequence) < 37:
                sequence += [[0]*63] * (37 - len(sequence))

            X[i] = np.array(sequence)
            y[i][batch_labels[i]] = 1  # one-hot

        return X, y

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)

In [10]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
                       max_num_hands=1, #Сча тут ода рука на изображении изется, если че можно сделать 2, я пока хз сколько рук на фотках
                       min_detection_confidence=0.5, #Коэффициент детекции - надо будет поиграться, чтобы добиться четкого распознавания рук даже в темноте.
                       static_image_mode=True) #Флаг того, что мы обрабатываем фотки
mp_draw = mp.solutions.drawing_utils

#Создаем список всех признаков
headers = []
for i in range(21):
    headers += [f'landmark_{i}_x', f'landmark_{i}_y', f'landmark_{i}_z']
points = []

for dir_in_train in X_train:
    dir = "dataset/" + str(dir_in_train)
    for photo in os.listdir(dir):
        if photo.endswith('.jpg'):
            image = cv2.imread(dir + "/" + photo)
            height, width, _ = image.shape
            results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
            hand = []
            if results.multi_hand_landmarks:
                landmarks = results.multi_hand_landmarks[0].landmark
                for lm in landmarks:
                    hand.append([lm.x * width, lm.y * height, lm.z * width])
            else:
                for _ in range(21):
                    hand.append([0]*3)
            points.append(hand)
    X_train_landmarks = np.array(points).reshape(-1, 37, 63)
    y_train = np.array(y_train)
    
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    # Колбэки
    early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
    checkpoint = ModelCheckpoint("best_model.keras", monitor='val_loss', save_best_only=True)

    history = model.fit(
    X_train_landmarks, y_train,
    epochs=300,
    batch_size=32,
    validation_data=(X_test, y_test),
    verbose=1  # 1 — прогресс-бар, 2 — только эпохи, 0 — ничего
)
            

ValueError: Data cardinality is ambiguous:
  x sizes: 1
  y sizes: 40336
Make sure all arrays contain the same number of samples.

In [11]:
X_train = X_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_train = pd.Series(y_train).reset_index(drop=True)
y_test = pd.Series(y_test).reset_index(drop=True)

In [None]:
train_gen = HandSequenceGenerator(X_train, y_train, batch_size=32, num_classes=27)
val_gen = HandSequenceGenerator(X_test, y_test, batch_size=32, num_classes=27)

model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
checkpoint = ModelCheckpoint("best_model.keras", monitor='val_loss', save_best_only=True)
tf.debugging.set_log_device_placement(True)


model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=300,
    callbacks=[early_stop, checkpoint],
    verbose=1
)

Epoch 1/300
   2/1261 [..............................] - ETA: 11:38:30 - loss: 3.2939 - accuracy: 0.0938

In [13]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1
