In [186]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

In [187]:
class GestureDataset(Dataset):
    fingers = [
        [0, 1, 2, 3, 4],
        [0, 5, 6, 7, 8],
        [0, 9, 10, 11, 12],
        [0, 13, 14, 15, 16],
        [0, 17, 18, 19, 20]
    ]

    def __init__(self, data: np.ndarray, label: np.ndarray):
        ## data augmentation

        ## get the abs value
        data = np.abs(data)
        ## scale data to 0~1
        data = (data - np.min(data)) / (np.max(data) - np.min(data))

        ## add the two nearby length in each finger
        for finger in self.fingers:
            for i in range(len(finger) - 1):
                dist = data[:, finger[i + 1]] - data[:, finger[i]]
                # add a new dimension
                dist = np.expand_dims(dist, axis=1)
                data = np.concatenate((data, dist), axis=1)

        print(data.shape)

        self.data = torch.tensor(data, dtype=torch.float32)
        self.label = torch.tensor(label, dtype=torch.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.label[idx]

In [188]:
## load the data
raw_data = np.load('./dataset/full_dataset_200k.npz')
train_data, train_label, test_data, test_label = raw_data['train_data'], raw_data['train_label'], raw_data['test_data'], \
    raw_data['test_label']

# Create an instance of the dataset
train_dataset = GestureDataset(train_data, train_label)
test_dataset = GestureDataset(test_data, test_label)

# Create DataLoader
train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1024, shuffle=False)

data_dim = train_dataset.data.shape[1] * train_dataset.data.shape[2]
print(data_dim)

(200000, 41, 3)
(50000, 41, 3)
123


In [189]:
import torch.nn as nn
import torch.optim as optim

# Check if GPU is available and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


# Define the model
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()

        self.latent_dim = 1
        self.encoder = nn.Sequential(
            nn.Linear(data_dim, 128),  # Input layer, flattening 21x3 to 63 features
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, self.latent_dim),
        )

        self.decoder = nn.Sequential(
            nn.Linear(self.latent_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, data_dim),
            nn.Tanh(),
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


# Initialize the model, loss function, and optimizer
model = Autoencoder().to(device)
criterion = nn.MSELoss()  # Using CrossEntropyLoss for classification
optimizer = optim.Adam(model.parameters(), lr=1e-3)

Using device: cuda


In [190]:
## summary the model
from torchsummary import summary

summary(model, (data_dim,), 1024, "cuda")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                [1024, 128]          15,872
              ReLU-2                [1024, 128]               0
            Linear-3                [1024, 256]          33,024
              ReLU-4                [1024, 256]               0
            Linear-5                  [1024, 1]             257
            Linear-6                [1024, 256]             512
              ReLU-7                [1024, 256]               0
            Linear-8                [1024, 128]          32,896
              ReLU-9                [1024, 128]               0
           Linear-10                [1024, 123]          15,867
             Tanh-11                [1024, 123]               0
Total params: 98,428
Trainable params: 98,428
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.48
Forward/ba

In [191]:
from tqdm.notebook import tqdm

# Training loop with tqdm progress bar
epochs = 10
for epoch in range(epochs):
    running_loss = 0.0

    progress_bar = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{epochs}')
    for inputs, labels in progress_bar:
        inputs = inputs.to(device)
        inputs = inputs.view(-1, data_dim)

        outputs = model(inputs)  # Forward pass
        loss = criterion(outputs, inputs)  # Calculate loss

        optimizer.zero_grad()  # Clear gradients
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights

        running_loss += loss.item() * inputs.size(0)
        progress_bar.set_postfix({'loss': loss.item()})



Epoch 1/10:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 2/10:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 3/10:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 4/10:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 5/10:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 6/10:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 7/10:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 8/10:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 9/10:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 10/10:   0%|          | 0/196 [00:00<?, ?it/s]

In [192]:
## run the test samples through the encoder

model.eval()

test_labels = []
test_latent = []

for inputs, labels in test_loader:
    inputs = inputs.to(device)
    inputs = inputs.view(-1, data_dim)
    outputs = model.encoder(inputs)
    test_latent.append(outputs.cpu().detach().numpy())
    test_labels.append(labels.cpu().detach().numpy())

test_latent = np.concatenate(test_latent, axis=0)

test_labels = np.concatenate(test_labels, axis=0)
test_labels = np.argmax(test_labels, axis=1)

print(test_latent.shape, test_labels.shape)

(50000, 1) (50000,)


In [193]:
## plot the latent space
%matplotlib tk
import matplotlib
import matplotlib.pyplot as plt

labels = np.unique(test_labels)
label_map = {
    0: 'call',
    1: 'dislike',
    2: 'fist',
    3: 'like',
    4: 'ok',
    5: 'one',
    6: 'palm',
    7: 'peace',
    8: 'rock',
    9: 'three',
    10: 'three2',
}

fig, ax = plt.subplots()

ax.set_title('Latent Space')

for i, label in enumerate(labels):
    idx = test_labels == label

    ax.scatter(test_latent[idx, 0],
               test_latent[idx, 1],
               c=np.array(matplotlib.colormaps['tab20'].colors[i]).reshape(1, -1),
               label=f"{label} {label_map[label]}",
               alpha=0.5)

ax.legend()
plt.show()

IndexError: index 1 is out of bounds for axis 1 with size 1

In [194]:
from sklearn.cluster import KMeans
from sklearn.metrics import confusion_matrix

# Initialize and fit KMeans
kmeans = KMeans(n_clusters=11, random_state=0).fit(test_latent)

# Predict the cluster IDs for each data point
cluster_ids = kmeans.predict(test_latent)

In [None]:
# Colors from a colormap
colors = matplotlib.cm.get_cmap('tab20', 11)

fig, ax = plt.subplots()
ax.set_title('Latent Space with K-Means Clustering')

for cluster in range(11):
    idx = cluster_ids == cluster
    ax.scatter(test_latent[idx, 0],
               test_latent[idx, 1],
               c=np.array(matplotlib.colormaps['tab20'].colors[cluster]).reshape(1, -1),
               label=f"Cluster {cluster}",
               alpha=0.5)

ax.legend()
plt.show()


In [195]:
# Calculate clustering accuracy
# We need to find the best match between cluster labels and true labels
def clustering_accuracy(true_labels, cluster_labels):
    # Confusion matrix between true labels and cluster labels
    matrix = confusion_matrix(true_labels, cluster_labels)
    # Summing the highest values in each column of the confusion matrix
    max_matches = np.sum(np.max(matrix, axis=0))
    accuracy = max_matches / len(true_labels)
    return matrix, accuracy


# Calculate and print the clustering accuracy
matrix, accuracy = clustering_accuracy(test_labels, cluster_ids)
print(f"Clustering Accuracy: {accuracy:.4f}")
print("Confusion Matrix:")
print(matrix)

Clustering Accuracy: 0.6876
Confusion Matrix:
[[   3 2120    0    0    3    2    4    0 2046  103    0]
 [   3  108 1553    0    3 1979    1    0  163  153    0]
 [   2   12    0    1   24    0    1    0   32 4012    0]
 [   7 1686    0    0    1   18    2    0 2013  112    0]
 [   9    0    0 1472    4    1   10  611   10   51 2096]
 [  20    0    0    0 3950    0    2    0   25  150    0]
 [   7    0    0 3931    3    0  104   57    9   30  196]
 [7815    3    0    1   54    0  596    0   42  158    0]
 [  13    0    0    0 3779    0    4    0   19  160    0]
 [3827    0    0    9    7    0  102    1   13   73   13]
 [  23    0    0   15   22    0 4269    0   22   42    3]]
