# Multisensory integration architecture for sign-language recognition
## Project for Neuro-Inspired Systems Engineering course at TUM

Authors of the project: Tatyana Klimenko and Cristina Gil

This script was written by Cristina Gil

# Part 1: modeling sign-language gestures

The following code aims to model from a basic perspective sign-language gestures of american sign language.
<img src="american_sign_language.png">


In [1]:
import numpy as np
import pandas as pd
from collections import *

In [2]:
# Letters
letters = {}

# Fingers: Index, Middle, Ring, Little
fingers = ['index','middle','ring','little']

# Joints
'''
Angles for fingers:  
    - MCP_fe: metacarpophalangeal flexion/extension 
    - MCP_aa: metacarpophalangeal abduction/adduction
    - PIP: Proximal-interphalangeal 

Angles for thumb:
    - TMC_fe: trapeziometacarpal flexion/extension
    - TMC_aa: trapeziometacarpal abduction/adduction
    - MCP_fe: metacarpophalangeal flexion/extension
'''
angles = {'MCP_fe','MCP_aa','PIP'}
angles_thumb = {'TMC_fe','TMC_aa','MCP_fe'}


# Initialize angles to 0 (Rest position of the hand)
default_params = defaultdict(dict)

for finger in fingers:
    for angle in angles:
        default_params[finger][angle] = 0
# Thumb
for angle in angles_thumb:
    default_params['thumb'][angle]=0

# Create a dataframe from default_parameters
df = pd.DataFrame.from_dict(default_params)
print(df)

# Transform the dataframe to a row vector
array = df.as_matrix().ravel()
array = array[~np.isnan(array)]

# Number of parameters
n_params = len(array)

        index  little  middle  ring  thumb
MCP_aa    0.0     0.0     0.0   0.0    NaN
MCP_fe    0.0     0.0     0.0   0.0    0.0
PIP       0.0     0.0     0.0   0.0    NaN
TMC_aa    NaN     NaN     NaN   NaN    0.0
TMC_fe    NaN     NaN     NaN   NaN    0.0


In [3]:
# Parametrize the letters

# Letter A
A = df.copy()
A.loc['MCP_fe':'PIP','index':'ring']=90
A.loc['TMC_aa','thumb']=-45
letters['A']=A

# Letter B
B = df.copy()
B.loc['TMC_fe','thumb']=180
letters['B']=B

# Letter D
D = df.copy()
D.loc['MCP_fe','little':'thumb']=45
D.loc['PIP','little':'ring']=60
D.loc['TMC_fe','thumb'] = 90
letters['D']=D

# Letter E
E = df.copy()
E.loc['MCP_fe','index':'thumb']=90
E.loc['PIP','index':'ring']=90
E.loc['TMC_aa','thumb']=-90
letters['E']=E

# Letter F
F = df.copy()
F.loc['MCP_fe','index']=30
F.loc['PIP','index']=90
F.loc['TMC_fe','thumb']=45
F.loc['MCP_fe','thumb']=90
letters['F']=F


In [4]:
# Print letters structure for checking
for key, value in letters.items():
    print(key, value)


A         index  little  middle  ring  thumb
MCP_aa    0.0     0.0     0.0   0.0    NaN
MCP_fe   90.0    90.0    90.0  90.0    0.0
PIP      90.0    90.0    90.0  90.0    NaN
TMC_aa    NaN     NaN     NaN   NaN  -45.0
TMC_fe    NaN     NaN     NaN   NaN    0.0
B         index  little  middle  ring  thumb
MCP_aa    0.0     0.0     0.0   0.0    NaN
MCP_fe    0.0     0.0     0.0   0.0    0.0
PIP       0.0     0.0     0.0   0.0    NaN
TMC_aa    NaN     NaN     NaN   NaN    0.0
TMC_fe    NaN     NaN     NaN   NaN  180.0
D         index  little  middle  ring  thumb
MCP_aa    0.0     0.0     0.0   0.0    NaN
MCP_fe    0.0    45.0    45.0  45.0   45.0
PIP       0.0    60.0    60.0  60.0    NaN
TMC_aa    NaN     NaN     NaN   NaN    0.0
TMC_fe    NaN     NaN     NaN   NaN   90.0
E         index  little  middle  ring  thumb
MCP_aa    0.0     0.0     0.0   0.0    NaN
MCP_fe   90.0    90.0    90.0  90.0   90.0
PIP      90.0    90.0    90.0  90.0    NaN
TMC_aa    NaN     NaN     NaN   NaN  -90.0
TMC

In [5]:
# Function for creatig samples
def create_samples(letter,n_samples,variance):
    # Convert to array and eliminate nan values
    array = letter.as_matrix().ravel()
    array = array[~np.isnan(array)]
    
    # Create samples and add gausian noise
    data = np.tile(array, (n_samples,1))
    noise = np.random.normal(0, variance, data.shape)
    params = data+noise
    
    return params

In [34]:
# Initialize variables
n_samples = 1000
variance = 5 #For noise: 5 degrees in all the angles  
n_letters = len(letters)

# Create the parameters structure to feed the neural network
params = np.zeros((n_letters*n_samples,n_params))
# labels = np.zeros(n_letters*n_samples)
labels = np.zeros((n_letters*n_samples,n_letters))
key_labels = {} # Dictionary to store the number assigned to each letter

l=0
for key, letter in letters.items():
    
    # Params
    new_params = create_samples(letter,n_samples,variance)
    params[(l*n_samples):(l*n_samples+n_samples),:] = new_params
    
    # Labels
    labels[(l*n_samples):(l*n_samples+n_samples),l]=1
    l+=1
    
    # Labels
    # labels[((l-1)*n_samples):((l-1)*n_samples+n_samples)]=l
    key_labels[key]=l

# Part 2: Setting the NN

In [7]:
import torch
import torch.nn as nn

## 1. Prepare the data

In [35]:
# Split the data in training set, validation set and test set
# Input data stored in params, labels stored in labels

# Create random list of intergers to mask the input data
a = np.arange(4999)
np.random.shuffle(a)


# Test set
mask_test = np.zeros(n_letters*n_samples, dtype=bool)
trues_test = a[:100]
mask_test[trues_test] = True

test_data = params[mask_test,:]
test_labels = labels[mask_test,:]

# Validation set
mask_val = np.zeros(n_letters*n_samples, dtype=bool)
trues_val = a[100:200]
mask_val[trues_val]=True

val_data = params[mask_val,:]
val_labels = labels[mask_val,:]

# Train set
mask_train = (~mask_test)*(~mask_val)
train_data = params[mask_train,:]
train_labels = labels[mask_train,:]


print("Train size: %i" % len(train_data))
print("Val size: %i" % len(val_data))
print("Test size: %i" % len(test_data))


Train size: 4800
Val size: 100
Test size: 100


(4800, 5)


## 2. Define the network

In [38]:
class twoLayerNet(nn.Module):
    
    def __init__(self, D_in, H, D_out):
        super(twoLayerNet, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)
    
    def forward(self, x):
        h_relu = self.linear1(x).clamp(min=0)
        y_pred = self.linear2(h_relu)
        return y_pred

In [39]:
# N is batch size; D_in is input dimension; # H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, train_data.shape[1], 100, n_letters
# print(N, D_in, H, D_out)

net = twoLayerNet(D_in, H, D_out)

64 15 100 5


## 3. Define a loss function and an optimizer

In [40]:
# Construct our loss function and an Optimizer
loss_fn = nn.MSELoss(reduction='mean') # reduction = 'sum'
optimizer = torch.optim.SGD(net.parameters(), lr=1e-3)

## 4. Train the network

In [44]:
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
print(x.shape, y.shape)
print(type(x))

x = torch.from_numpy(np.float32(train_data[:N,:]))
y = torch.from_numpy(np.float32(train_labels[:N,:]))
print(type(x))

print(x.shape, y.shape)



for t in range(500):
    
    # Forward pass
    # y_pred = net(torch.from_numpy(train_data))
    y_pred = net(x)
    
    # Compute and print loss
    # loss = loss_fn(y_pred, torch.from_numpy(train_labels))
    loss = loss_fn(y_pred,y)
    print(t, loss.item())
    
    # Zero gradients
    optimizer.zero_grad()
    
    # Backward pass
    loss.backward()
    
    # Update the weights
    optimizer.step()


torch.Size([64, 15]) torch.Size([64, 5])
<class 'torch.Tensor'>
<class 'torch.Tensor'>
torch.Size([64, 15]) torch.Size([64, 5])
0 362.082275390625
1 320497.59375
2 0.40497004985809326
3 0.4046461284160614
4 0.4043225049972534
5 0.40399906039237976
6 0.40367597341537476
7 0.4033530652523041
8 0.4030304551124573
9 0.40270811319351196
10 0.40238600969314575
11 0.40206414461135864
12 0.401742547750473
13 0.4014212191104889
14 0.40110015869140625
15 0.4007793664932251
16 0.40045878291130066
17 0.4001384973526001
18 0.39981845021247864
19 0.39949867129325867
20 0.3991791009902954
21 0.39885979890823364
22 0.39854079484939575
23 0.39822202920913696
24 0.39790353178977966
25 0.3975852429866791
26 0.39726722240448
27 0.39694955945014954
28 0.39663201570510864
29 0.396314799785614
30 0.3959977924823761
31 0.3956810534000397
32 0.39536458253860474
33 0.3950483202934265
34 0.39473235607147217
35 0.3944166302680969
36 0.39410117268562317
37 0.3937859833240509
38 0.39347097277641296
39 0.39315631985

In [26]:
print(type(x))

<class 'torch.Tensor'>
