# Install packages and import dependencies

In [23]:
try:
    import torch
    from torch import nn 
    from torchvision import transforms 
    from torch.utils.data import Dataset
    import numpy as np

except ImportError:
    %pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
    import torch
    from torch import nn 
    from torchvision import transforms
    from torch.utils.data import Dataset
    import numpy as np

In [24]:
import time
from PIL import Image, ImageFilter
from pathlib import Path
try:
    from tqdm import tqdm
except ImportError:
    %pip install tqdm
    from tqdm import tqdm

# class to load dataset

This class loads the dataset from a specified directory
The dataset is Leap Motion's Hand Gesture Recognition Database
The images are loaded to x and the lables are one hot encoded

In [25]:
class LeapGestRecog(Dataset):
    def __init__(self, root:Path, transform=None):
        self._root = Path(root)
        self._transform = transform
        self._x = []
        self._y = []

        self.__get_data()  # Load all the images from subdirectories (recursively)
        # print(self._y[:5])

        self._len = len(self._y)

        self._predictors = self._y.copy()
        self._predictors = list(set(self._predictors)) # Extract out all the unique predictors for one hot encoding
        # print(self._predictors)

        # _y_index = [self._predictors.index(i) for i in self._y]
        self._y_one_hot = np.zeros((self._len, len(self._predictors)), dtype=float)
        for i, j in enumerate(self._y):
            self._y_one_hot[i, self._predictors.index(j)] = 1
        # print(self._y_one_hot[:5, :])



        print(f'Successfully loaded {self._len} images form \"{self._root}\"')

    def __get_data(self, path:Path = None):
        """Loads all the images from subdirectories (recursively)"""
        if not path :
            path = self._root
        for file in path.iterdir():
            if file.is_file():
                self._x.append(file)
                label = path.parts[-1].split('_')[1:]
                self._y.append('_'.join(label))

            if file.is_dir():
                self.__get_data(file)

    def __len__(self):
        return self._len

    def __getitem__(self, index):
        img = Image.open(self._x[index]).convert('L')
        # print(self._y_one_hot[index])
        y = torch.Tensor(self._y_one_hot[index])
        if self._transform:
            img = self._transform(img)
        return img, y

    @property
    def transform(self):
        return self._transform
    
    @transform.setter
    def transform(self, tf):
        self._transform = tf

root = './data/leapGestRecog/'
dataset = LeapGestRecog(root)

Successfully loaded 20000 images form "data\leapGestRecog"


# Define transformation of the dataset

In [26]:
class cropToContent(nn.Module):
    """Crops the given image to content with padding to given size

    converts the image to b/w and detects the edge of the gestures to crop the image

    Args:
        size (tuple): The size of the final output image
        Threshold (float): The threshold for the edge detection
        padding (tuple): the border padding
    """
    def __init__ (self, size:tuple=(128,128), thrushold:int=50, padding:tuple=(-10,-10,10,10)):
        super().__init__()
        self._size = size
        self._thrushold = thrushold
        self._padding = padding

    def forward (self, img:Image):
        img_t = img.filter(ImageFilter.GaussianBlur(radius = 10)) 
        img_t = img_t.point(lambda x : 0 if x < self._thrushold else 255, mode='1') # convert to b/w
        # plt.imshow(img_t)
        img_bbox = img_t.getbbox()  # get the bounding box of content
        # plt.imshow(img_t.crop(img_bbox))
        img_bbox = tuple(map(lambda i, j: i + j, img_bbox, self._padding))  # add padding to bounding box
        # print(img_bbox)
        img = img.crop(img_bbox) # crop the image to bounding box
        return img

    def __repr__(self):
        return f"{self.__class__.__name__}(size={self._size}, thrushold={self._thrushold})" 

In [27]:
transform = transforms.Compose([cropToContent(), transforms.Resize((128, 128)),  transforms.ToTensor()])
dataset.transform = transform

# Spliting dataset and defineing data loader

In [28]:
# split data into train and test datasets
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [int(0.8*len(dataset)), int(0.2*len(dataset))])

# define data loader for training and testing datasets
batch_size = int(64)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# Model Construction

In [29]:
# Run on GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


Convolution Nueral Network with three Nonlinear Convolution layers and one fully connected headen layers

In [30]:
class HandGestRecog(nn.Module):
    def __init__(self):
        super(HandGestRecog, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=5),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=5),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, kernel_size=5),
            nn.ReLU()
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128*25*25, 128),
            nn.ReLU(),
            nn.Linear(128, 10),
            nn.LogSoftmax(dim=1)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        # print(x.shape)
        x = self.fc_layers(x)
        return x

model = HandGestRecog().to(device)

In [31]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()

# Training the Model

In [32]:
def train(epochs = 10, limit = 1e-6):
    model.train()
    for epoch in range(epochs):
        print(f'Train Epoch: {epoch} :')
        epoch_time = time.time()
        for batch_idx, (data, target) in enumerate(train_loader):
            batch_time = time.time()
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = loss_fn(output, target)
            loss.backward()
            optimizer.step()

            print('Epoch : {} [{:5.0f}/{} ({:3.0f}%)]\tLoss: {:.6f}\t{:5.3f}ms'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item(), (time.time() - batch_time)*1e3))
            
            if loss.item() < limit:
                print('\nLoss is in acceptable range\n\n')
                return
            
        print('Epoch time: {:.3f}s'.format(time.time() - epoch_time)) 

total_time = time.time()
train()
print('Total Training Time: {:5.3f}ms'.format(time.time() - total_time))

Train Epoch: 0 :
Epoch time: 312.945s
Train Epoch: 1 :
Epoch time: 77.004s
Train Epoch: 2 :
Epoch time: 291.468s
Train Epoch: 3 :
Epoch time: 106.138s
Train Epoch: 4 :

Loss is in acceptable range


Total Training Time: 857.259ms


# Testing the Model

In [33]:
def test():
    model.eval()
    test_loss = 0
    correct = 0
    count = 0
    with torch.no_grad():
        for i, (data, target) in enumerate(test_loader):
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += loss_fn(output, target).item()
            max_pred_idx = output.max(axis=1).indices
            max_target_idx = target.max(axis=1).indices
            correct += max_pred_idx.eq(max_target_idx.data.view_as(max_pred_idx)).sum()
            count += len(target)
            print(f'Test Case : {i} \t\t {100.*count/len(test_loader.dataset)}%')
    test_loss /= len(test_loader.dataset)
    print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

test()

Test Case : 0 		 1.6%
Test Case : 1 		 3.2%
Test Case : 2 		 4.8%
Test Case : 3 		 6.4%
Test Case : 4 		 8.0%
Test Case : 5 		 9.6%
Test Case : 6 		 11.2%
Test Case : 7 		 12.8%
Test Case : 8 		 14.4%
Test Case : 9 		 16.0%
Test Case : 10 		 17.6%
Test Case : 11 		 19.2%
Test Case : 12 		 20.8%
Test Case : 13 		 22.4%
Test Case : 14 		 24.0%
Test Case : 15 		 25.6%
Test Case : 16 		 27.2%
Test Case : 17 		 28.8%
Test Case : 18 		 30.4%
Test Case : 19 		 32.0%
Test Case : 20 		 33.6%
Test Case : 21 		 35.2%
Test Case : 22 		 36.8%
Test Case : 23 		 38.4%
Test Case : 24 		 40.0%
Test Case : 25 		 41.6%
Test Case : 26 		 43.2%
Test Case : 27 		 44.8%
Test Case : 28 		 46.4%
Test Case : 29 		 48.0%
Test Case : 30 		 49.6%
Test Case : 31 		 51.2%
Test Case : 32 		 52.8%
Test Case : 33 		 54.4%
Test Case : 34 		 56.0%
Test Case : 35 		 57.6%
Test Case : 36 		 59.2%
Test Case : 37 		 60.8%
Test Case : 38 		 62.4%
Test Case : 39 		 64.0%
Test Case : 40 		 65.6%
Test Case : 41 		 67.2%
Test Cas