# Task 2: Understand body language by gesture recognition with convolutional neural network

## 1. Do literature search on Convolution Neural Network. Learn how to build a convolutional layer in PyTorch.

## 2. Referring to the guide in Task 1, build your own network for gesture classification using convolutional layers. Please see the references 4 in the manual to learn how to build convolutional layers in PyTorch.

## 3. Analyse and comment on the performance of the model. Make a comparison between the fully connected based and convolutional based models and comment on it.

In [21]:
## written by JYT
import torch
import os
import torch.utils.data as utils_data
import cv2
import numpy as np
import itertools
from torch import nn
import torch.nn.functional as F

### load data

In [12]:
Image = []
path_images = './dataset_processed/images'
for mainDir, subDir, files in os.walk(path_images):
    for file in files:
        
        currentPath = os.path.join(mainDir, file)
        # print(currentPath)
        Image.append(cv2.imread(currentPath)[:, :, 0])
Image = np.array(Image)
dataset_size, H, W = Image.shape


print(Image.shape)

(78, 48, 48)


In [13]:
Label = []
path_labels = './dataset_processed/labels'
for file in os.listdir(path_labels):
    Label.append(np.loadtxt(os.path.join(path_labels, file)))
    

Label = np.array(list(itertools.chain.from_iterable(Label)))

num_classes = int(np.max(Label))+1
print(Label.shape)

(78,)


### generate dataloader 

In [14]:
task2_dataset = utils_data.TensorDataset(torch.Tensor(Image), torch.Tensor(Label))
split_rate = 0.8
train_size = int(dataset_size*split_rate)
test_size = dataset_size-train_size
train_set, test_set = utils_data.random_split(task2_dataset, [train_size, test_size])

train_loader = utils_data.DataLoader(dataset=train_set, batch_size=8, shuffle=True)
test_loader = utils_data.DataLoader(dataset=test_set, batch_size=8, shuffle=True)
print('finish!')


finish!


### build my nerual network

In [22]:
class JYT_model(nn.Module):
    def __init__(self, nums_class):
        super(JYT_model, self).__init__()
        self.conv1= nn.Sequential(
            nn.Conv2d(
                in_channels=1,
                out_channels=16,
                kernel_size=3,
                stride=1,
                padding=1, #padding= (kernel_size-1)/2, to maintain the size after concolutionU
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),#24
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(
                in_channels=16,
                out_channels=32,
                kernel_size=3,
                stride=1,
                padding=1,
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2), #12
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(32, 32, 3, 1, 1),
            nn.ReLU(),                  #32 * 12 *12 
        )
        self.out = nn.Linear(32*12*12, nums_class)
    
    def forward(self, x):
        x = self.conv1(x)
        #print('1', x.shape)
        x = self.conv2(x)
        #print('2', x.shape)
        x = self.conv3(x)
        #print('3', x.shape)
        x = x.view(x.size(0), -1)
        x = self.out(x)
        return x

### instantiation 

In [39]:
net = JYT_model(nums_class=num_classes)
# for name, parameter in net.named_parameters():
#     print(name)
#     print(parameter)
if torch.cuda.is_available():
    net = net.cuda()
best_accuracy = 0.0

In [40]:
optimizer = torch.optim.Adam(net.parameters(), lr=0.01, weight_decay=0.0001)
loss_func = nn.CrossEntropyLoss()

### train and test

In [41]:
for epoch in range(500):
    training_loss = 0.0
    training_acc = 0.0
    for step, (img, label) in enumerate(train_loader):
        net.train()
        if torch.cuda.is_available():
            img, label = img.cuda(), label.cuda()
        img = img.view(-1,1,H,W)
        #print(img.shape)
        output = net(img)
        #print(output)
        #print(output.shape, label.shape)
        label=label.to(torch.long)
        batch_loss = loss_func(output, label)
        
        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()
        
        training_loss += batch_loss.item()
        _, pred = torch.max(output.data, 1)
        training_acc +=(pred == label).sum().item()
    
    training_loss /= (step+1)
    training_acc /= train_size
    
    print('epoch:{},\t training_loss:{:.3f}, \t traing_acc:{:.6f}'.format(epoch, 
                    training_loss, training_acc))
    
   


epoch:0,	 training_loss:68.915, 	 traing_acc:0.580645
epoch:1,	 training_loss:0.570, 	 traing_acc:0.822581
epoch:2,	 training_loss:0.396, 	 traing_acc:0.870968
epoch:3,	 training_loss:0.041, 	 traing_acc:0.983871
epoch:4,	 training_loss:0.006, 	 traing_acc:1.000000
epoch:5,	 training_loss:0.001, 	 traing_acc:1.000000
epoch:6,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:7,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:8,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:9,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:10,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:11,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:12,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:13,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:14,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:15,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:16,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:17,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:18,	 training_loss:0.000, 	 tra

epoch:154,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:155,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:156,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:157,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:158,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:159,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:160,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:161,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:162,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:163,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:164,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:165,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:166,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:167,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:168,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:169,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:170,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:171,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:172,

epoch:311,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:312,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:313,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:314,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:315,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:316,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:317,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:318,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:319,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:320,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:321,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:322,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:323,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:324,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:325,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:326,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:327,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:328,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:329,

epoch:468,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:469,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:470,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:471,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:472,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:473,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:474,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:475,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:476,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:477,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:478,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:479,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:480,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:481,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:482,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:483,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:484,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:485,	 training_loss:0.000, 	 traing_acc:1.000000
epoch:486,

In [42]:
    # ----------test----------
    net.eval()
    test_acc = 0.0

    for test_image, test_label in test_loader:
        if torch.cuda.is_available():
            test_image, test_label = test_image.cuda(), test_label.cuda()
        test_image = test_image.view(-1, 1, H, W)
        test_output = net(test_image)
        _, predicted = torch.max(test_output.data, 1)
        test_acc += (predicted == test_label).sum().item()
    test_acc /= test_size

    print('epoch={:d}\ttrain loss={:.6f}\ttrain accuracy={:.3f}\ttest accuracy={:.3f}'.format(
        epoch, training_loss, training_acc, test_acc))

    if test_acc >= best_accuracy:
        if not os.path.exists('./trained_models'):
            os.makedirs('./trained_models')
        torch.save(net.state_dict(), './trained_models/JYT_mode2.pkl')
        best_accuracy = test_acc

epoch=499	train loss=0.000001	train accuracy=1.000	test accuracy=0.875
