In [1]:
import os
from typing import NamedTuple

import albumentations
import cv2
import mediapipe as mp
import numpy as np
import torch
import torch.nn.functional as F
from matplotlib import pyplot as plt
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset
from tqdm.autonotebook import tqdm

2023-04-08 00:48:08.749071: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-04-08 00:48:08.752039: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-04-08 00:48:08.821874: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-04-08 00:48:08.823030: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=1,
    min_detection_confidence=0.3,
    min_tracking_confidence=0.5,
)
mp_drawing = mp.solutions.drawing_utils

mp_drawing_styles = mp.solutions.drawing_styles
landmark_style = mp_drawing_styles.get_default_hand_landmarks_style()

connection_style = mp_drawing_styles.get_default_hand_connections_style()

In [3]:
def dataset_files(dataset_dir: str = "../dataset/", class_size: int = 70) -> list[tuple[int, str]]:
    files_dataset = []
    for dir_label in sorted(os.listdir(dataset_dir)):
        dir_label_path = dataset_dir + dir_label
        if os.path.isfile(dir_label_path):
            continue
        dir_label_path += "/"
        index = int(dir_label.split("_")[0])
        files = os.listdir(dir_label_path)
        files_dataset.extend(
            [(index, dir_label_path + img_path) for img_path in np.random.choice(files, size=class_size, replace=False)]
        )
    return files_dataset

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [4]:
def test_images():
    for key, value in files_dataest.items():
        image = cv2.imread(value[0])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = hands.process(image)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:

                mp_drawing.draw_landmarks(
                    image,
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS,
                    landmark_style,
                    connection_style,
                )
        plt.imshow(image)
        plt.title(key)
        plt.show()

In [5]:
# transform = albumentations.Compose(
#     [
#         albumentations.Downscale(p=0.2),
#         albumentations.RandomBrightness(p=0.2),
#         albumentations.RandomBrightnessContrast(p=0.2),
#         albumentations.pytorch.ToTensorV2(),
#     ]
# )
# transformed = transform(image=image)

In [6]:
class HandLandmarksDataset(Dataset):
    def __init__(self, imgs: list[tuple[int, str]]):
        self.imgs = imgs
        self.mp_hands = mp.solutions.hands.Hands()

    def __len__(self):
        return len(self.imgs)

    def process_image(self, idx: int) -> tuple[NamedTuple, int]:
        label, filename = self.imgs[idx]
        image = cv2.imread(filename)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        results = hands.process(image)
        return results, label

    def __getitem__(self, idx):
        results, label = self.process_image(idx)

        while results.multi_hand_landmarks is None:
            idx = np.random.randint(len(self.imgs))
            results, label = self.process_image(idx)

        landmarks = results.multi_hand_landmarks[0].landmark

        # Get the position of the wrist landmark (Landmark.WRIST)
        wrist_position_x, wrist_position_y = (
            landmarks[mp.solutions.hands.HandLandmark.WRIST].x,
            landmarks[mp.solutions.hands.HandLandmark.WRIST].y,
        )

        # Normalize the landmarks relative to the wrist position
        normalized_landmarks = []
        for landmark in landmarks:
            normalized_landmarks.append((landmark.x - wrist_position_x, landmark.y - wrist_position_y))  # landmark.z

        # Convert the landmarks to a PyTorch tensor
        normalized_landmarks = torch.flatten(torch.tensor(normalized_landmarks))

        normalized_landmarks /= max(abs(normalized_landmarks))

        # Return a tuple of the image and hand landmarks
        return normalized_landmarks, label

In [7]:
class GestureClassifier(nn.Module):
    def __init__(self, num_classes=14):
        super(GestureClassifier, self).__init__()

        # Fully connected layers
        self.fc1 = nn.Linear(42, 128)  # 21 * 2
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)  # 14 classes for the gestures

    def forward(self, x):
        # x = x.view(-1, 21 * 2)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [8]:
train_data, test_data = train_test_split(dataset_files(), test_size=0.1, random_state=42)

In [9]:
# Create the dataset and dataloader
train_dataset = HandLandmarksDataset(train_data)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)

test_dataset = HandLandmarksDataset(test_data)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=True)

# Load the model and define the loss and optimizer
model = GestureClassifier()
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [10]:
def train(
    model, train_loader, test_loader, criterion, optimizer, num_epochs=10, batch_size=32, device=torch.device("cpu")
):
    # Train the model
    train_predicted = []
    train_labels = []
    for epoch in tqdm(range(num_epochs), desc="Epochs"):
        # Set the model to training mode
        model.train()

        # Initialize running loss and accuracy
        running_loss = 0.0
        running_accuracy = 0.0

        # Train on the batches in the training set
        pbar = tqdm(train_loader, desc=f"Train Epoch {epoch+1}/{num_epochs}")
        for i, (inputs, labels) in enumerate(pbar):
            # Send the inputs and labels to the device
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Zero the gradients
            optimizer.zero_grad()

            # Forward pass and backward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Update the running loss and accuracy
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            running_accuracy += torch.sum(predicted == labels).item()

            train_predicted.extend(predicted.tolist())
            train_labels.extend(labels.tolist())

        # Compute the average loss and accuracy for the epoch
        epoch_loss = running_loss / len(train_dataset)
        epoch_accuracy = running_accuracy / len(train_dataset)
        epoch_f1 = f1_score(train_labels, train_predicted, average="macro")

        # Print the training loss and accuracy for the epoch
        print(f"Train Epoch {epoch+1}: Loss={epoch_loss:.4f}, Accuracy={epoch_accuracy:.4f}, F1={epoch_f1:.4f}")

        # Set the model to evaluation mode
        model.eval()

        # Initialize test loss and accuracy
        test_loss = 0.0
        test_accuracy = 0.0

        # Evaluate on the batches in the test set
        with torch.no_grad():
            pbar = tqdm(test_loader, desc=f"Test Epoch {epoch+1}/{num_epochs}")

            test_predicted = []
            test_labels = []
            for i, (inputs, labels) in enumerate(pbar):
                # Send the inputs and labels to the device
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Forward pass
                outputs = model(inputs)

                # Compute the loss and accuracy
                loss = criterion(outputs, labels)
                test_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs.data, 1)
                test_accuracy += torch.sum(predicted == labels).item()

                test_predicted.extend(predicted.tolist())
                test_labels.extend(labels.tolist())

        # Compute the average test loss and accuracy for the epoch
        test_loss /= len(test_dataset)
        test_accuracy /= len(test_dataset)
        test_f1 = f1_score(test_labels, test_predicted, average="macro")

        # Print the test loss and accuracy for the epoch
        print(f"Test Epoch {epoch+1}: Loss={test_loss:.4f}, Accuracy={test_accuracy:.4f}, F1={test_f1:.4f}")

In [11]:
train(model, train_loader, test_loader, criterion, optimizer, num_epochs=7)

Epochs:   0%|          | 0/7 [00:00<?, ?it/s]

Train Epoch 1/7:   0%|          | 0/221 [00:00<?, ?it/s]

Train Epoch 1: Loss=2.1144, Accuracy=0.3696, F1=0.3580


Test Epoch 1/7:   0%|          | 0/25 [00:00<?, ?it/s]

Test Epoch 1: Loss=1.5468, Accuracy=0.4388, F1=0.3696


Train Epoch 2/7:   0%|          | 0/221 [00:00<?, ?it/s]

Train Epoch 2: Loss=1.0534, Accuracy=0.6905, F1=0.5317


Test Epoch 2/7:   0%|          | 0/25 [00:00<?, ?it/s]

Test Epoch 2: Loss=0.7661, Accuracy=0.7959, F1=0.7833


Train Epoch 3/7:   0%|          | 0/221 [00:00<?, ?it/s]

Train Epoch 3: Loss=0.6405, Accuracy=0.8503, F1=0.6394


Test Epoch 3/7:   0%|          | 0/25 [00:00<?, ?it/s]

Test Epoch 3: Loss=0.5734, Accuracy=0.8367, F1=0.8259


Train Epoch 4/7:   0%|          | 0/221 [00:00<?, ?it/s]

Train Epoch 4: Loss=0.4443, Accuracy=0.8878, F1=0.7013


Test Epoch 4/7:   0%|          | 0/25 [00:00<?, ?it/s]

Test Epoch 4: Loss=0.3505, Accuracy=0.9286, F1=0.9189


Train Epoch 5/7:   0%|          | 0/221 [00:00<?, ?it/s]

Train Epoch 5: Loss=0.3356, Accuracy=0.9127, F1=0.7437


Test Epoch 5/7:   0%|          | 0/25 [00:00<?, ?it/s]

Test Epoch 5: Loss=0.3048, Accuracy=0.9490, F1=0.9530


Train Epoch 6/7:   0%|          | 0/221 [00:00<?, ?it/s]

Train Epoch 6: Loss=0.2670, Accuracy=0.9240, F1=0.7735


Test Epoch 6/7:   0%|          | 0/25 [00:00<?, ?it/s]

Test Epoch 6: Loss=0.2568, Accuracy=0.9286, F1=0.9211


Train Epoch 7/7:   0%|          | 0/221 [00:00<?, ?it/s]

Train Epoch 7: Loss=0.2324, Accuracy=0.9331, F1=0.7961


Test Epoch 7/7:   0%|          | 0/25 [00:00<?, ?it/s]

Test Epoch 7: Loss=0.3075, Accuracy=0.9184, F1=0.9304


In [12]:
torch.save(model, "model.pth")

In [13]:
import onnx
import tensorflow as tf
from onnx_tf.backend import prepare


def pytorch_to_tensorflowlight(model: GestureClassifier, model_name: str = "gesture"):
    dummy_input = torch.randn(1, 21 * 2)

    input_names = ["actual_input_1"] + ["learned_%d" % i for i in range(6)]
    output_names = ["output1"]

    onnx_name = model_name + ".onnx"

    torch.onnx.export(model, dummy_input, onnx_name, verbose=True, input_names=input_names, output_names=output_names)

    onnx_model = onnx.load(onnx_name)
    tf_rep = prepare(onnx_model)

    tf_rep.export_graph(model_name)

    # Convert the model
    converter = tf.lite.TFLiteConverter.from_saved_model(model_name)  # path to the SavedModel directory
    tflite_model = converter.convert()

    # Save the model.
    with open(f"{model_name}.tflite", "wb") as f:
        f.write(tflite_model)


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [14]:
pytorch_to_tensorflowlight(model)

2023-04-08 00:52:49.244219: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-04-08 00:52:49.272918: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Exported graph: graph(%actual_input_1 : Float(1, 42, strides=[42, 1], requires_grad=0, device=cpu),
      %learned_0 : Float(128, 42, strides=[42, 1], requires_grad=1, device=cpu),
      %learned_1 : Float(128, strides=[1], requires_grad=1, device=cpu),
      %learned_2 : Float(64, 128, strides=[128, 1], requires_grad=1, device=cpu),
      %learned_3 : Float(64, strides=[1], requires_grad=1, device=cpu),
      %learned_4 : Float(14, 64, strides=[64, 1], requires_grad=1, device=cpu),
      %learned_5 : Float(14, strides=[1], requires_grad=1, device=cpu)):
  %/fc1/Gemm_output_0 : Float(1, 128, strides=[128, 1], requires_grad=1, device=cpu) = onnx::Gemm[alpha=1., beta=1., transB=1, onnx_name="/fc1/Gemm"](%actual_input_1, %learned_0, %learned_1), scope: __main__.GestureClassifier::/torch.nn.modules.linear.Linear::fc1 # /home/samoed/.pyenv/versions/3.10.4/lib/python3.10/site-packages/torch/nn/modules/linear.py:114:0
  %/Relu_output_0 : Float(1, 128, strides=[128, 1], requires_grad=1, device



INFO:tensorflow:Assets written to: gesture/assets


INFO:tensorflow:Assets written to: gesture/assets
2023-04-08 00:52:50.751784: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2023-04-08 00:52:50.751821: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2023-04-08 00:52:50.752476: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: gesture
2023-04-08 00:52:50.752901: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2023-04-08 00:52:50.752919: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: gesture
2023-04-08 00:52:50.754152: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:353] MLIR V1 optimization pass is not enabled
2023-04-08 00:52:50.754439: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2023-04-08 00:52:50.774027: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle a