In [1]:
from utils import process_batch_for_landmarks, extract_landmarks_from_batch
import torch
from torch.utils.data import DataLoader
import os
from loader import GestureDataset
import mediapipe as mp 
import torch.nn as nn 




In [11]:
from torchvision import transforms

file = os.path.join("D:", "IPN_Hand","annotations-20231128T085307Z-001", "annotations", "Annot_TrainList.txt")
# frame_folders = os.path.join(".", "IPN_Hand", "frames")
hdf5_path = os.path.join("D:", "IPN_Hand", "hand_gestures.h5")
transform = transforms.Compose(
    [
        transforms.ToTensor()
    ]
)
dataset = GestureDataset(hdf5_path, file, label_all_gestures=True, transform=transform)

In [12]:
train_loader = DataLoader(dataset, batch_size=5, shuffle=False)

In [13]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1)
mp_drawing = mp.solutions.drawing_utils

In [5]:
for i, data in enumerate(train_loader):
    print(data[0].shape)
    processed_batch = process_batch_for_landmarks(data[0])
    landmarks = extract_landmarks_from_batch(processed_batch, hands)
    print(landmarks.shape)
    break

torch.Size([5, 60, 3, 240, 320])
(5, 60, 63)


In [14]:
from model import LSTMGestureClassifier

In [15]:
model = LSTMGestureClassifier(63, 128, 2, 2)
# model.load_state_dict(torch.load("lstm_det_local.pth"))

In [27]:
for i, data in enumerate(train_loader):
    processed_batch = process_batch_for_landmarks(data[0])
    landmarks = extract_landmarks_from_batch(processed_batch, hands)
    landmarks = torch.tensor(landmarks).float()
    print(landmarks.shape)
    out = model(landmarks)
    print(out.shape)
    break

torch.Size([5, 60, 63])
torch.Size([5, 2])


In [16]:
weight_for_no_gesture = 0.8  # Example value
weight_for_gesture = 0.2     # Example value
weights = torch.tensor([weight_for_gesture, weight_for_no_gesture])

In [17]:
criterion = nn.CrossEntropyLoss(weight=weights)

In [18]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [19]:
device = "cpu" #  torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [20]:
for epoch in range(5):
    loss_history = []
    for i, data in enumerate(train_loader):
        inputs, labels = data[0].to(device), data[1].to(device)
        processed_batch = process_batch_for_landmarks(inputs)
        landmarks = extract_landmarks_from_batch(processed_batch, hands)
        landmarks = torch.tensor(landmarks).float()

        # Forward pass
        outputs = model(landmarks)
        # outputs = outputs[:, 1]
        # labels = labels.view(-1).float()
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss_history.append(loss.item())
        print(f'Epoch [{epoch+1}], Loss: {loss.item():.4f}')
        
    name = f"lstm_det_local_epoch_{epoch}.pth"
    torch.save(model.state_dict(), name)

Epoch [1], Loss: 0.7005
Epoch [1], Loss: 0.5945
Epoch [1], Loss: 0.3486
Epoch [1], Loss: 0.0556
Epoch [1], Loss: 0.0007
Epoch [1], Loss: 3.0431
Epoch [1], Loss: 0.0008
Epoch [1], Loss: 0.0011
Epoch [1], Loss: 0.0013
Epoch [1], Loss: 0.0077
Epoch [1], Loss: 1.5926
Epoch [1], Loss: 0.0305
Epoch [1], Loss: 1.1772
Epoch [1], Loss: 1.1702
Epoch [1], Loss: 0.1203
Epoch [1], Loss: 0.5100
Epoch [1], Loss: 0.6158
Epoch [1], Loss: 0.6885
Epoch [1], Loss: 0.3344
Epoch [1], Loss: 0.3286
Epoch [1], Loss: 0.4624
Epoch [1], Loss: 0.5317
Epoch [1], Loss: 0.7464
Epoch [1], Loss: 0.7587
Epoch [1], Loss: 0.3913
Epoch [1], Loss: 0.9746
Epoch [1], Loss: 0.3527
Epoch [1], Loss: 0.8019
Epoch [1], Loss: 0.3341
Epoch [1], Loss: 1.0472
Epoch [1], Loss: 0.7930
Epoch [1], Loss: 0.2212
Epoch [1], Loss: 0.7715
Epoch [1], Loss: 0.7540
Epoch [1], Loss: 0.7334
Epoch [1], Loss: 0.5040
Epoch [1], Loss: 0.5180
Epoch [1], Loss: 0.7179
Epoch [1], Loss: 0.7207
Epoch [1], Loss: 0.7203
Epoch [1], Loss: 0.4954
Epoch [1], Loss:

In [21]:
file = os.path.join("D:", "IPN_Hand","annotations-20231128T085307Z-001", "annotations", "Annot_TestList.txt")

transform = transforms.Compose(
    [
        transforms.ToTensor()
    ]
)
test_set = GestureDataset(hdf5_path, file, label_all_gestures=True, transform=transform)

In [22]:
test_loader = DataLoader(test_set, 1, shuffle=False)

In [23]:

for i, data in enumerate(test_loader):
    inputs, labels = data[0].to(device), data[1].to(device)
    processed_batch = process_batch_for_landmarks(inputs)
    landmarks = extract_landmarks_from_batch(processed_batch, hands)
    landmarks = torch.tensor(landmarks).float()

    outputs = model(landmarks)
    
    y = torch.argmax(outputs, dim=1)
    print(f"y = {y} \t gt = {labels}")

    if i == 30:
        break


y = tensor([1]) 	 gt = tensor([1])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([1]) 	 gt = tensor([1])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([1]) 	 gt = tensor([1])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([1]) 	 gt = tensor([1])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([1]) 	 gt = tensor([1])
y = tensor([1]) 	 gt = tensor([1])
y = tensor([0]) 	 gt = tensor([0])
y = tensor([0]) 	 gt