In [None]:
from PIL import Image
from torch.utils.data import Dataset

base_path = "../leapGestRecog/"
people = ["00", "01", "02", "03", "04", "05", "06", "07", "08", "09"]
gestures = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10"]
gestures_map = {
    1: "palm", 
    2: "l", 
    3: "fist", 
    4: "fist_moved",
    5: "thumb", 
    6: "index", 
    7: "ok", 
    8: "palm_moved", 
    9: "c",
    10: "down"
}

class GestureDataset(Dataset):
  def __init__(self, transform = None, target_transform = None):
    img_labels = []
    for person in range(10):
      for label in range(1, 11):
        path = "%s/%s_%s/" % (str(person).zfill(2), str(label).zfill(2), gestures_map[label])
        for id in range(1, 201):
          filename = "frame_%s_%s_%s.png" % (str(person).zfill(2), str(label).zfill(2), str(id).zfill(4))
          img_labels.append((base_path + path + filename, label))
          
    self.img_labels = img_labels 
    self.transform = transform
    self.target_transform = target_transform
  def __getitem__(self, index):
    path, label = self.img_labels[index]
    img = Image.open(path).convert('1') # convert to black and white
    if self.transform is not None:
      img = self.transform(img)
    print(type(img))
    img = img / 255
    return img, label
  def __len__(self):
    return len(self.img_labels)

In [None]:
from torchvision import transforms
from torch.utils.data import DataLoader

gesture_dataset = GestureDataset(transform=transforms.ToTensor())

train_dataloader = DataLoader(gesture_dataset, batch_size=8, shuffle=True)
# test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)

In [None]:
import matplotlib.pyplot as plt

# Display image and label.
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
img = train_features[0].squeeze()
for pix in img:
  print(pix, end=" ")
label = train_labels[0]
plt.imshow(img)
plt.show()
print(f"Label: {label}")

In [1]:
import os
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

class Model(nn.Module):
  def __init__(self, dropout=0.2):
    super(Model, self).__init__()
    self.conv1 = nn.Conv2d(1,32, kernel_size=3, padding=1)
    self.conv2 = nn.Conv2d(32, 128, kernel_size=3, padding=1) # TODO Expected input batch_size (1024) to match target batch_size (8). 1024:8=128:1
    self.conv2_drop = nn.Dropout2d(p=dropout)
    self.fc1 = nn.Linear(9600, 100) # 9600 = number channels * width * height
    self.fc2 = nn.Linear(100, 10)
    self.fc1_drop = nn.Dropout(p=dropout)

  def forward(self, x):
    # print(x.size(0))
    # print(x.shape)
    x = torch.relu(F.max_pool2d(self.conv1(x), 2))
    # print(x.shape)
    x = torch.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
    
    # flatten over channel, height and width = 9600
    # print(x.shape)
    x = x.view(x.size(0), x.size(1), x.size(2) * x.size(3))
    
    x = torch.relu(self.fc1_drop(self.fc1(x)))
    # x = torch.softmax(self.fc2(x), dim=-1)
    x = torch.relu(self.fc2(x))
    return x


model = Model().to(device)

Using cuda device


In [None]:

def train(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  model.train()
  for batch, (X, y) in enumerate(dataloader):
    y = F.one_hot(y - 1, num_classes=10)
    X, y = X.to(device), y.to(device)

    # Compute prediction error
    pred = model(X)
    # print(pred.shape)
    loss = loss_fn(pred, y)

    # Backpropagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if batch % 100 == 0:
      loss, current = loss.item(), batch * len(X)
      print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test(dataloader, model, loss_fn):
  size = len(dataloader.dataset)
  num_batches = len(dataloader)
  model.eval()
  test_loss, correct = 0, 0
  with torch.no_grad():
    for X, y in dataloader:
      X, y = X.to(device), y.to(device)
      pred = model(X)
      test_loss += loss_fn(pred, y).item()
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()
  test_loss /= num_batches
  correct /= size
  print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

epochs = 5
for t in range(epochs):
  print(f"Epoch {t+1}\n-------------------------------")
  train(train_dataloader, model, loss_fn, optimizer)
  # test(test_dataloader, model, loss_fn)
print("Done!")

In [None]:
# Example of target with class indices
loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
print(input.shape)
print(target.shape)
output = loss(input, target)
output.backward()

In [None]:
torch.cuda.is_available()