In [20]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

In [21]:
class GestureDataset(Dataset):
    fingers = [
        [0, 1, 2, 3, 4],
        [0, 5, 6, 7, 8],
        [0, 9, 10, 11, 12],
        [0, 13, 14, 15, 16],
        [0, 17, 18, 19, 20]
    ]

    def __init__(self, data: np.ndarray, label: np.ndarray):
        ## data augmentation

        ## add the two nearby length in each finger
        for finger in self.fingers:
            for i in range(len(finger) - 1):
                dist = data[:, finger[i + 1]] - data[:, finger[i]]
                # add a new dimension
                dist = np.expand_dims(dist, axis=1)
                data = np.concatenate((data, dist), axis=1)

        print(data.shape)

        self.data = torch.tensor(data, dtype=torch.float32)
        self.label = torch.tensor(label, dtype=torch.float32)

        ## normalize the data
        self.data = (self.data - self.data.mean(dim=0)) / self.data.std(dim=0)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.label[idx]

In [22]:
## load the data
raw_data = np.load('dataset/full_dataset_200k.npz')
train_data, train_label, test_data, test_label = raw_data['train_data'], raw_data['train_label'], raw_data['test_data'], \
    raw_data['test_label']

# Create an instance of the dataset
train_dataset = GestureDataset(train_data, train_label)
test_dataset = GestureDataset(test_data, test_label)

# Create DataLoader
train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1024, shuffle=False)

(200000, 41, 3)
(50000, 41, 3)


In [23]:
import torch.nn as nn
import torch.optim as optim

# Check if GPU is available and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


# Define the model
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.network = nn.Sequential(
            nn.Flatten(),
            nn.Linear(41 * 3, 128),  # Input layer, flattening 21x3 to 63 features
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            # nn.Linear(256, 512),
            # nn.ReLU(),
            # nn.Linear(512, 256),
            # nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 12),  # Output layer, 12 classes
        )
        # self.network = nn.Sequential(
        #     nn.Conv2d(1, 16, kernel_size=(1, 3), stride=1, padding=0),
        #     nn.ReLU(),
        #     nn.Flatten(),
        #     nn.Linear(16 * 21, 256),
        #     nn.ReLU(),
        #     nn.Linear(256, 128),
        #     nn.ReLU(),
        #     nn.Linear(128, 12),
        #     nn.ReLU(),
        #     # nn.LogSoftmax(dim=1)
        # )

    def forward(self, x):
        # add a channel dimension
        x = x.unsqueeze(1)

        return self.network(x)


# Initialize the model, loss function, and optimizer
model = Classifier().to(device)
criterion = nn.CrossEntropyLoss()  # Using CrossEntropyLoss for classification
optimizer = optim.Adam(model.parameters(), lr=1e-3)

Using device: cuda


In [24]:
## summary the model
from torchsummary import summary

summary(model, (41, 3), 1024, "cuda")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1                [1024, 123]               0
            Linear-2                [1024, 128]          15,872
              ReLU-3                [1024, 128]               0
            Linear-4                [1024, 256]          33,024
              ReLU-5                [1024, 256]               0
            Linear-6                [1024, 128]          32,896
              ReLU-7                [1024, 128]               0
            Linear-8                 [1024, 12]           1,548
Total params: 83,340
Trainable params: 83,340
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.48
Forward/backward pass size (MB): 9.05
Params size (MB): 0.32
Estimated Total Size (MB): 9.85
----------------------------------------------------------------


In [25]:
from tqdm.notebook import tqdm
from sklearn.metrics import confusion_matrix

# Training loop with tqdm progress bar
epochs = 20
for epoch in range(epochs):
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    progress_bar = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{epochs}')
    for inputs, labels in progress_bar:
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to the device

        outputs = model(inputs)  # Forward pass
        loss = criterion(outputs, labels)  # Calculate loss

        optimizer.zero_grad()  # Clear gradients
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights

        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        labels = torch.argmax(labels, dim=1)

        total_predictions += predicted.size(0)
        correct_predictions += (predicted == labels).sum().item()

        running_loss += loss.item() * inputs.size(0)
        progress_bar.set_postfix({'loss': loss.item(), 'acc': f"{100. * correct_predictions / total_predictions:.2f}%"})



Epoch 1/20:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 2/20:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 3/20:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 4/20:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 5/20:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 6/20:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 7/20:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 8/20:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 9/20:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 10/20:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 11/20:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 12/20:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 13/20:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 14/20:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 15/20:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 16/20:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 17/20:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 18/20:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 19/20:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 20/20:   0%|          | 0/196 [00:00<?, ?it/s]

In [26]:
## test the model
model.eval()
correct_predictions = 0
total_predictions = 0
y_true = []  # True labels
y_pred = []  # Predicted labels

with torch.no_grad():
    progress_bar = tqdm(test_loader, desc='Testing')

    for inputs, labels in progress_bar:
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to the device

        outputs = model(inputs)  # Forward pass

        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        labels = torch.argmax(labels, dim=1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())

        progress_bar.set_postfix({'acc': f"{100. * correct_predictions / total_predictions:.2f}%"})

    cm = confusion_matrix(y_true, y_pred)
    print(cm)

Testing:   0%|          | 0/49 [00:00<?, ?it/s]

[[4177   79    1   33    1    7    1    4    3    0    7    0]
 [  30 3833    7    5    5    8    1    4    2    4    2    0]
 [  17   64 3984    5    1   31    1    3    5    5    3    1]
 [  44   73    1 3738    2    6    2    0    1    5    2    0]
 [   4   33    0    4 4111    3    3    3    1    4    2    2]
 [  21   63   14    2    1 3894    0    3   13    4    2   16]
 [   6   27    0    1    8    2 4377    0    4    2    6    1]
 [   8   51    3    4    0   21    2 4071    4   24    8   33]
 [  22   54    4    3    4   32    1    5 3900    6    0    4]
 [  14   44    1    3   14    8    6   32    3 3869    2    5]
 [  14   18    2    4    2    8    3    3    1    0 4293    0]
 [  17   36    3    3    4   30    1   10    6    7    5 4420]]


In [27]:
# model_path = 'model/classifier.pth'
# torch.save(model.state_dict(), model_path)