# Task 1: Understand body language by gesture recognition with fully connected neural network

## 1. Do literature search on gesture recognition and its application in Human-Robot Interaction. Summarize what you have learned.

## 2. Referring to the previous example about building a neural network based classifier, use what you have learned to read the code for gesture classification below and design your own network architecture using fully connected layers.

## 3. Run the model. Analyse and comment on the performance of your model based on fully connected layers.

In [46]:
import cv2
import numpy as np
import os
import itertools
import torch.utils.data as utils_data

## 1) data preprocessing

Define a function to preprocess the images including resizing and binaryzation

In [2]:
def processSkinImage(filePath, resize_HW=48):
    # step 1
    # read the image
    original = cv2.imread(filename=filePath)

    # step 2
    # resize the image to
    image_resized = cv2.resize(original, (resize_HW, resize_HW))

    # step 3
    # convert the image from rgb to YCbCr
    image_ycbcr = cv2.cvtColor(image_resized, cv2.COLOR_BGR2YCR_CB)

    # step 4
    # get the central color of the image
    # expected the hand to be in the central of the image
    Cb_center_color = image_ycbcr[int(resize_HW/2), int(resize_HW/2), 1]
    Cr_center_color = image_ycbcr[int(resize_HW/2), int(resize_HW/2), 2]
    # set the range
    Cb_Difference = 15
    Cr_Difference = 10

    # step 5
    # detect skin pixels
    Cb = image_ycbcr[:, :, 1]
    Cr = image_ycbcr[:, :, 2]
    index = np.where((Cb >= Cb_center_color-Cb_Difference) & (Cb <= Cb_center_color+Cb_Difference)
                     & (Cr >= Cr_center_color-Cr_Difference) & (Cr <= Cr_center_color+Cr_Difference))

    # Mark detected pixels and output
    image_output = np.zeros((resize_HW, resize_HW))
    image_output[index] = 255

    # show image
    # cv2.imshow("", image_output)
    # cv2.waitKey(0)
    return image_output

Deal with all the images using the function defined above.
The processed data is stored in a new folder 'dataset_processed'.

Generate labels for each class. (class 0, 1, ..., num_classes)


In [4]:
path = './dataset/images'
path_processed = './dataset_processed/images'

# -------------------images processing--------------
for mainDir, subDir, fileList in os.walk(path):
    for file in fileList:
        currentPath = os.path.join(mainDir, file)
        print(currentPath)
        processedImage = processSkinImage(currentPath)

        new_mainDir = path_processed + mainDir.split(path)[-1] # mainDir.split(path)[-1] 的值为 /left  /right等
        # new_mainDir 应该为 "./dataset_processed/images/left"等
        if not os.path.exists(new_mainDir):
            os.makedirs(new_mainDir)
        cv2.imwrite(os.path.join(new_mainDir, file), processedImage)

# -----------------label generation----------------
label_path = './dataset_processed/labels'
if not os.path.exists(label_path):
    os.makedirs(label_path)

files = os.listdir(path)
print(files)
for i, file in enumerate(files):
    subclass_label_path = os.path.join(label_path, file+'.txt')
    with open(subclass_label_path, 'w') as f:
        f.write('#label\n')
    print(os.listdir(os.path.join(path_processed, file)))
    for _ in range(len(os.listdir(os.path.join(path_processed, file)))):
        with open(subclass_label_path, 'a') as f:
            f.write('{:d}\n'.format(i))
    f.close()

./dataset/images\left\left (1).jpg
./dataset/images\left\left (10).jpg
./dataset/images\left\left (11).jpg
./dataset/images\left\left (12).jpg
./dataset/images\left\left (13).jpg
./dataset/images\left\left (14).jpg
./dataset/images\left\left (15).jpg
./dataset/images\left\left (16).jpg
./dataset/images\left\left (17).jpg
./dataset/images\left\left (18).jpg
./dataset/images\left\left (19).jpg
./dataset/images\left\left (2).jpg
./dataset/images\left\left (20).jpg
./dataset/images\left\left (21).jpg
./dataset/images\left\left (22).jpg
./dataset/images\left\left (23).jpg
./dataset/images\left\left (24).jpg
./dataset/images\left\left (25).jpg
./dataset/images\left\left (26).jpg
./dataset/images\left\left (27).jpg
./dataset/images\left\left (3).jpg
./dataset/images\left\left (4).jpg
./dataset/images\left\left (5).jpg
./dataset/images\left\left (6).jpg
./dataset/images\left\left (7).jpg
./dataset/images\left\left (8).jpg
./dataset/images\left\left (9).jpg
./dataset/images\palm\palm (1).jpg
./

## 2) load data

In [13]:
Image = []
path_images = './dataset_processed/images'
for mainDir, subDir, fileList in os.walk(path_images):
    for file in fileList:
        
        currentPath = os.path.join(mainDir, file)
        # print(currentPath)
        Image.append(cv2.imread(currentPath)[:, :, 0])
Image = np.array(Image)
dataset_size, H, W = Image.shape
# for FCNN model, the image need to be stretched into one dimension: (b, h, w)->(b, h*w)
Image = Image.reshape(dataset_size, -1)
print(Image.shape)

Label = []
path_labels = './dataset_processed/labels'
for file in os.listdir(path_labels):
    Label.append(np.loadtxt(os.path.join(path_labels, file)))
    #print(np.loadtxt(os.path.join(path_labels, file)))
#print(Label)
Label = np.array(list(itertools.chain.from_iterable(Label)))#去掉一层括号
#print(Label)
num_classes = int(np.max(Label))+1
print(Label.shape)

(78, 2304)
(78,)


## 3) build your own neural network based on fully connected layers.

Design the neural network architecture

In [16]:
from torch import nn
import torch.nn.functional as F

In [34]:
# class FCNNModel(nn.Module):
#     def __init__(self, *args):
#         super().__init__()
#         # code by yourself
    
#     def forward(self, x):
#         # code by yourself

class JYT_model(nn.Module):
    def __init__(self, input_layer_size, hidden_layer_size, num_classes):
        super().__init__()
        self.hidden = nn.Linear(input_layer_size, hidden_layer_size)
        self.out = nn.Linear(hidden_layer_size, num_classes)
        self.drop_out = nn.Dropout(0.5)
    
    def forward(self, x):
        x = F.relu(self.hidden(x));
        x = self.drop_out(x)
        x = self.out(x)
        return x

instantiate your model, set a optimizer and define a loss function.

In [38]:
model = JYT_model(input_layer_size=H*W, hidden_layer_size=int(H*W/2), num_classes=num_classes)
if torch.cuda.is_available():
    model = model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
#optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()

In [44]:
for name, parameter in model.named_parameters():
    print(name, parameter, parameter.size())

hidden.weight Parameter containing:
tensor([[ 0.0141,  0.0123, -0.0120,  ..., -0.0120, -0.0017,  0.0006],
        [ 0.0074, -0.0161,  0.0079,  ..., -0.0005, -0.0084,  0.0043],
        [-0.0207, -0.0051,  0.0059,  ...,  0.0026, -0.0014,  0.0154],
        ...,
        [-0.0088,  0.0153,  0.0067,  ..., -0.0163, -0.0178, -0.0064],
        [-0.0096, -0.0161,  0.0124,  ..., -0.0147, -0.0077, -0.0179],
        [ 0.0046,  0.0115, -0.0196,  ...,  0.0178,  0.0019,  0.0058]],
       device='cuda:0', requires_grad=True) torch.Size([1152, 2304])
hidden.bias Parameter containing:
tensor([-0.0042, -0.0076, -0.0148,  ...,  0.0054, -0.0088, -0.0036],
       device='cuda:0', requires_grad=True) torch.Size([1152])
out.weight Parameter containing:
tensor([[ 0.0276, -0.0282, -0.0089,  ..., -0.0117, -0.0269,  0.0200],
        [-0.0033, -0.0037, -0.0251,  ...,  0.0286, -0.0097,  0.0013],
        [ 0.0094,  0.0138, -0.0053,  ...,  0.0143, -0.0068, -0.0291],
        [ 0.0044,  0.0113,  0.0231,  ...,  0.0050,  

## 4) train and test

Encapsulate data

In [47]:
dataset = utils_data.TensorDataset(torch.Tensor(Image), torch.LongTensor(Label))
split_ratio = 0.8
train_size = int(split_ratio * dataset_size)
test_size = dataset_size - train_size
train_set, test_set = utils_data.random_split(dataset, [train_size, test_size])
train_loader = utils_data.DataLoader(dataset=train_set, batch_size=8, shuffle=True)
test_loader = utils_data.DataLoader(dataset=test_set, batch_size=8, shuffle=True)
print('Data is ready!')

Data is ready!


The following is the training and testing process in detail.

In [54]:
best_accuracy = 0
for epoch in range(500):
    running_loss = 0.0
    train_acc = 0.0
    testing_loss = 0.0
    testing_acc = 0.0
    for step, (batch_image, batch_label) in enumerate(train_loader):
        model.train()
        if torch.cuda.is_available():
             batch_image, batch_label = batch_image.cuda(), batch_label.cuda()
        batch_output = model(batch_image)
        batch_loss = loss_func(batch_output, batch_label)

        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()

        running_loss += batch_loss.item()

        # train accuracy
        _, train_predicted = torch.max(batch_output.data, 1)
        train_acc += (train_predicted == batch_label).sum().item()

    train_acc /= train_size
    running_loss /= (step+1)
    with torch.no_grad():
        for test_data, test_label in test_loader:
            if torch.cuda.is_available():
                test_data, test_label = test_data.cuda(), test_label.cuda()
            test_output = model(test_data)
            test_loss = loss_func(test_output, test_label)
            
            
            testing_loss += test_loss.item()
            
            _, test_pred = torch.max(test_output.data, 1)
            testing_acc += (test_pred == test_label).sum().item()
        
        testing_acc /= test_size
        testing_loss /= (step+1)
        print("epoch:{}, testing_acc:{}, testing_loss:{}".format(epoch, testing_acc, testing_loss))
            


epoch:0, testing_acc:0.9375, testing_loss:274.5951232910156
epoch:1, testing_acc:0.875, testing_loss:348.9240074157715
epoch:2, testing_acc:0.875, testing_loss:414.30700302124023
epoch:3, testing_acc:0.875, testing_loss:437.20196533203125
epoch:4, testing_acc:0.875, testing_loss:473.1040954589844
epoch:5, testing_acc:0.875, testing_loss:474.00511169433594
epoch:6, testing_acc:0.875, testing_loss:302.4994201660156
epoch:7, testing_acc:0.875, testing_loss:489.2295227050781
epoch:8, testing_acc:0.9375, testing_loss:310.87945556640625
epoch:9, testing_acc:0.9375, testing_loss:193.82566833496094
epoch:10, testing_acc:0.875, testing_loss:236.76541137695312
epoch:11, testing_acc:0.875, testing_loss:357.8869171142578
epoch:12, testing_acc:0.875, testing_loss:353.21771240234375
epoch:13, testing_acc:0.9375, testing_loss:259.9726257324219
epoch:14, testing_acc:0.875, testing_loss:362.8655471801758
epoch:15, testing_acc:0.875, testing_loss:358.9603385925293
epoch:16, testing_acc:0.9375, testing_l

epoch:143, testing_acc:0.9375, testing_loss:64.23692321777344
epoch:144, testing_acc:0.9375, testing_loss:21.604290008544922
epoch:145, testing_acc:0.875, testing_loss:473.6740493774414
epoch:146, testing_acc:0.9375, testing_loss:768.3223876953125
epoch:147, testing_acc:0.9375, testing_loss:525.0159301757812
epoch:148, testing_acc:0.9375, testing_loss:635.3720703125
epoch:149, testing_acc:0.9375, testing_loss:1090.814453125
epoch:150, testing_acc:0.9375, testing_loss:718.93359375
epoch:151, testing_acc:0.875, testing_loss:529.3270034790039
epoch:152, testing_acc:0.875, testing_loss:468.2075424194336
epoch:153, testing_acc:0.9375, testing_loss:376.44915771484375
epoch:154, testing_acc:0.875, testing_loss:571.6861114501953
epoch:155, testing_acc:0.9375, testing_loss:609.376953125
epoch:156, testing_acc:0.875, testing_loss:672.1355743408203
epoch:157, testing_acc:0.8125, testing_loss:400.99830627441406
epoch:158, testing_acc:0.9375, testing_loss:565.615478515625
epoch:159, testing_acc:0.9

epoch:278, testing_acc:0.875, testing_loss:529.0968627929688
epoch:279, testing_acc:0.875, testing_loss:508.287109375
epoch:280, testing_acc:0.875, testing_loss:838.8486938476562
epoch:281, testing_acc:0.875, testing_loss:605.0911102294922
epoch:282, testing_acc:0.8125, testing_loss:466.2608947753906
epoch:283, testing_acc:0.875, testing_loss:648.2433013916016
epoch:284, testing_acc:0.9375, testing_loss:590.8512573242188
epoch:285, testing_acc:0.875, testing_loss:430.3374938964844
epoch:286, testing_acc:0.875, testing_loss:817.0520629882812
epoch:287, testing_acc:0.875, testing_loss:412.95008087158203
epoch:288, testing_acc:0.875, testing_loss:809.8721923828125
epoch:289, testing_acc:0.875, testing_loss:873.8425140380859
epoch:290, testing_acc:0.875, testing_loss:362.2657165527344
epoch:291, testing_acc:0.875, testing_loss:260.2716064453125
epoch:292, testing_acc:0.875, testing_loss:653.5154724121094
epoch:293, testing_acc:0.875, testing_loss:423.339599609375
epoch:294, testing_acc:0.8

epoch:421, testing_acc:0.875, testing_loss:649.3555908203125
epoch:422, testing_acc:0.875, testing_loss:689.1168518066406
epoch:423, testing_acc:0.875, testing_loss:757.2615966796875
epoch:424, testing_acc:0.875, testing_loss:320.79249572753906
epoch:425, testing_acc:0.875, testing_loss:544.8226013183594
epoch:426, testing_acc:0.875, testing_loss:660.5321044921875
epoch:427, testing_acc:0.875, testing_loss:589.3108673095703
epoch:428, testing_acc:0.875, testing_loss:505.06024169921875
epoch:429, testing_acc:0.875, testing_loss:640.0286254882812
epoch:430, testing_acc:0.875, testing_loss:596.6590576171875
epoch:431, testing_acc:0.875, testing_loss:700.2551727294922
epoch:432, testing_acc:0.875, testing_loss:658.4998168945312
epoch:433, testing_acc:0.9375, testing_loss:488.3329772949219
epoch:434, testing_acc:0.875, testing_loss:977.1179046630859
epoch:435, testing_acc:0.875, testing_loss:543.7259368896484
epoch:436, testing_acc:0.875, testing_loss:207.3296127319336
epoch:437, testing_ac

In [57]:
    # ----------test----------
    model.eval()
    test_acc = 0.0
    for test_image, test_label in test_loader:
        if torch.cuda.is_available():
            test_image, test_label = test_image.cuda(), test_label.cuda()
        test_output = model(test_image)
        _, predicted = torch.max(test_output.data, 1)
        test_acc += (predicted == test_label).sum().item()
    test_acc /= test_size

    print('epoch={:d}\ttrain loss={:.6f}\ttrain accuracy={:.3f}\ttest accuracy={:.3f}'.format(
        epoch, running_loss, train_acc, test_acc))

    if test_acc >= best_accuracy:
        if not os.path.exists('./trained_models'):
            os.makedirs('./trained_models')
        torch.save(model.state_dict(), './trained_models/JYT_model.pkl')
        best_accuracy = test_acc

epoch=499	train loss=0.000000	train accuracy=1.000	test accuracy=0.875
