In [1]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

In [2]:
class GestureDataset(Dataset):
    def __init__(self, data: np.ndarray, label: np.ndarray):
        self.data = torch.tensor(data, dtype=torch.float32)
        self.label = torch.tensor(label, dtype=torch.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.label[idx]

In [3]:
## load the data
raw_data = np.load('E:/Hand_Gesture_Recognition/gesture_dataset/full_dataset.npz')
train_data, train_label, test_data, test_label = raw_data['train_data'], raw_data['train_label'], raw_data['test_data'], \
    raw_data['test_label']

# Create an instance of the dataset
train_dataset = GestureDataset(train_data, train_label)
test_dataset = GestureDataset(test_data, test_label)

# Create DataLoader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim

# Check if GPU is available and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


# Define the model
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.flatten = nn.Flatten()
        self.network = nn.Sequential(
            nn.Linear(21 * 3, 63),  # Input layer, flattening 21x3 to 63 features
            nn.ReLU(), 
            nn.Linear(63, 189), 
            nn.ReLU(), 
            nn.Linear(189, 256),
            nn.ReLU(),
            nn.Linear(256, 189),
            nn.ReLU(),
            nn.Linear(189, 63),
            nn.ReLU(),
            nn.Linear(63, 12),  # Output layer, 12 classes
            nn.LogSoftmax(dim=1)  # Using LogSoftmax for numerical stability
        )

    def forward(self, x):
        x = self.flatten(x)  # Flatten the input
        return self.network(x)


# Initialize the model, loss function, and optimizer
model = Classifier().to(device)
criterion = nn.CrossEntropyLoss()  # Using CrossEntropyLoss for classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

Using device: cuda


In [5]:
from tqdm.notebook import tqdm

# Training loop with tqdm progress bar
epochs = 10
for epoch in range(epochs):
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    progress_bar = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{epochs}')
    for inputs, labels in progress_bar:
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to the device

        outputs = model(inputs)  # Forward pass
        loss = criterion(outputs, labels)  # Calculate loss

        optimizer.zero_grad()  # Clear gradients
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights

        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)

        labels = torch.argmax(labels, dim=1)
        
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

        running_loss += loss.item() * inputs.size(0)
        progress_bar.set_postfix({'loss': loss.item(), 'acc': f"{100. * correct_predictions / total_predictions:.2f}%"})

    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = 100. * correct_predictions / total_predictions

Epoch 1/10:   0%|          | 0/3907 [00:00<?, ?it/s]

Epoch 2/10:   0%|          | 0/3907 [00:00<?, ?it/s]

Epoch 3/10:   0%|          | 0/3907 [00:00<?, ?it/s]

Epoch 4/10:   0%|          | 0/3907 [00:00<?, ?it/s]

Epoch 5/10:   0%|          | 0/3907 [00:00<?, ?it/s]

Epoch 6/10:   0%|          | 0/3907 [00:00<?, ?it/s]

Epoch 7/10:   0%|          | 0/3907 [00:00<?, ?it/s]

Epoch 8/10:   0%|          | 0/3907 [00:00<?, ?it/s]

Epoch 9/10:   0%|          | 0/3907 [00:00<?, ?it/s]

Epoch 10/10:   0%|          | 0/3907 [00:00<?, ?it/s]

In [6]:
## test the model
model.eval()
correct_predictions = 0
total_predictions = 0

with torch.no_grad():
    progress_bar = tqdm(test_loader, desc='Testing')
    
    for inputs, labels in progress_bar:
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to the device

        outputs = model(inputs)  # Forward pass

        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        labels = torch.argmax(labels, dim=1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

        progress_bar.set_postfix({'acc': f"{100. * correct_predictions / total_predictions:.2f}%"})


Testing:   0%|          | 0/142 [00:00<?, ?it/s]

In [8]:
import cv2
import mediapipe as mp

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode = False,
    max_num_hands = 1,
    min_detection_confidence = 0.5,
    min_tracking_confidence = 0.5
)

cap = cv2.VideoCapture(0)
try:
    while True:
        success, frame = cap.read()
        if not success:
            print("Ignoring empty camera frame")
            continue
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        result = hands.process(frame_rgb)
        
        if result.multi_hand_landmarks:
            for hand_landmarks in result.multi_hand_landmarks:
                skeleton_data = np.array([[landmark.x, landmark.y, landmark.z] for landmark in hand_landmarks.landmark])
                input_data = torch.Tensor(skeleton_data).to(device).view(1, -1)
                predictions = model.network(input_data)
                predicted_gesture = np.argmax(predictions.cpu().detach())
                print(f"Predicted Gesture: {predicted_gesture}")
        else:
            print("No hands detected")
        cv2.imshow('Live Gesture Recognition', frame)
        if cv2.waitKey(5) & 0xFF == 27:
            break
        
finally:
    cap.release()
    cv2.destroyAllWindows()
    hands.close()


No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands detected
No hands d