In [1]:
from IPython.core.display import display, HTML
display(HTML('<style>.container { width:100% !important; }</style>'))

In [2]:
import pandas as pd
import numpy as np

from tqdm.notebook import tqdm
from ipywidgets import Video
from itertools import combinations

import re
import itertools
import lzma
import pickle

# Location Tree

In [3]:
# TODO add mean distance for multi location locations

In [4]:
LOCATION_TREE = dict({
    'ROOT': {
        'Body Related (Fixed)': {
            'Upper Body': {
                'Head': {
                    'High': [
                        'Forehead',
                        'Temple',
                        'Ear',
                        'Head',
                    ],
                    'Face Leave': [
                        'Face'
                    ],
                    'Low': [
                        'Mouth',
                        'Chin',
                        'Cheek',
                        'Eye',
                        'Nose',
                        'Upper Lip',
                    ],
                },
                'Upper Torso': [
                    'Shoulder',
                    'Neck',
                    'Chest',
                    'Shoulder Contra',
                    'Back',
                ],
            },
            'Lower Torso': [
                'Trunk',
                'Belly',
                'Chest Contra',
            ],
            'Sub Torso': [
                'Leg',
                 'Knee',
            ],
        },
        'Non Body Related': {
            'Non-neutral Space': {
                'Hand': [
                    'WH: Thumb Side',
                    'WH: Back',
                    'WH: Palm',
                    'WH: Front',
                    'WH: Finger Tips',
                    'WH: Ring Finger',
                    'WH: Web Space',
                    'WH',
                    'Wrist',
                ],
                'Arm': [
                    'Upper Arm',
                    'Lower Arm',
                    'Armpit',
                    'Arm',
                ],
            },
            'Neutral Space Leave': [
                'Neutral Space',
            ],
        },
    }
})

# Movement Tree

In [5]:
MOVEMENT_TREE = dict({
    'ROOT': {
        'None': [
            'None',
        ],
        'Diagonal': {
            'Vertical': {
                'Single': [
                    'Downwards',
                    'Upwards',
                    'Backwards and Downwards',
                ],
                'Double': [
                    'Downwards and Forwards',
                    'Upwards and Forwards',
                    'Upwards and Downwards',
                ],
            },
            'Diagonal Leave': [
                'Ipsilateral and Upwards',
                'Ipsilateral and Downwards',
                'Contralateral and Upwards',
                'Contralateral and Downwards',
                'Downwards and Ipsilateral',
                'Downwards and Contralateral',
            ],
            'Horizontal': {
                'X Leave': {
                    'Single': [
                        'Ipsilateral',
                        'Contralateral',
                        'Contralateral and Forwards',
                        'Ipsilateral and Forwards',
                        'Forwards and Ipsilateral',
                        'Backwards and Ipsilateral',
                    ],
                    'Double': [
                        'Ipsilateral and Contralateral',
                    ],
                },
                'Z': {
                    'Single': [
                        'Forwards',
                        'Backwards',
                    ],
                    'Double': [
                        'Backwards and Forwards',
                    ],
                },
            },
        },
        'Other': [
            'Variable',
            'to and fro',
            'Towards Location',
            'From Location',
            'Distal',
            'Downwards and Towards',
        ],
    }
})

# Handedness Tree

In [6]:
HANDEDNESS_TREE = dict({
    'ROOT': {
        '1': [
            '1',
        ],
        '2': [
            '2s',
            '2a',
            '2n',
        ],
    },
})

# Path Distance

In [7]:
def get_paths(tree, res=[]):
    ress = dict()
    if type(tree) is dict:
        for k, v in tree.items():
            res_new = res if 'leave' in k.lower() else res + [k.lower()]
            for lk, lv in get_paths(v, res=res_new).items():
                ress[lk] = lv
    elif type(tree) is list:
        for leave in tree:
            ress[leave.lower()] = res + [leave.lower()]
    else:
        return []
    
    return ress

LOCATION_TREE_PATHS = get_paths(LOCATION_TREE)
MOVEMENT_TREE_PATHS = get_paths(MOVEMENT_TREE)
HANDEDNESS_TREE_PATHS = get_paths(HANDEDNESS_TREE)

In [8]:
def get_mean_distance(aa, bb, paths, normalize=1, debug=False):
    if len(aa) == len(bb) and all([a == b for a, b in zip(aa, bb)]):
         return 0
        
    _sum = 0
    _count = 0
    for a, b in itertools.product(aa, bb):
        _sum += get_distance(a, b, paths, normalize=normalize)
        _count += 1
        
    return _sum / _count

In [9]:
def get_distance(a, b, paths, normalize=1, debug=False):
    if a == 'MISSING' or b == 'MISSING':
        return 0
    
    path_a = paths[a]
    path_b = paths[b]
    
    closest_node = 0 # root
    for node_a, node_b in zip(path_a, path_b):
        if debug:
            print(f'node_a: {node_a}, node_b: {node_b}')
        
        if node_a == node_b:
            closest_node += 1
            
    return (len(path_a) + len(path_b) - (closest_node * 2)) / normalize

# Maximum Path Length in Tree

In [10]:
def get_max_distance(paths):
    max_len = 0
    for (a, _), (b, _) in combinations(paths.items(), 2):
        distance = get_distance(a, b, paths)
        max_len = max(max_len, distance)
    
    return max_len

In [11]:
LOCATIONS_MAX_PATH_LEN = get_max_distance(LOCATION_TREE_PATHS)
MOVEMENTS_MAX_PATH_LEN = get_max_distance(MOVEMENT_TREE_PATHS)
HANDEDNESS_MAX_PATH_LEN = get_max_distance(HANDEDNESS_TREE_PATHS)
print(f'LOCATIONS_MAX_PATH_LEN: {LOCATIONS_MAX_PATH_LEN}')
print(f'MOVEMENTS_MAX_PATH_LEN: {MOVEMENTS_MAX_PATH_LEN}')
print(f'HANDEDNESS_MAX_PATH_LEN: {HANDEDNESS_MAX_PATH_LEN}')

LOCATIONS_MAX_PATH_LEN: 9.0
MOVEMENTS_MAX_PATH_LEN: 7.0
HANDEDNESS_MAX_PATH_LEN: 4.0


# Sign Shape Sets

In [12]:
def get_hand_shape_features(hand_shape):
    hand_shape_mapping = dict({
        # General
        'A': ['thumb'],
        'S': ['close'],
        'AS': ['close'],
        'E': ['close'],
        'bloem': ['close'],
        # All shapes
        'B1': ['all'],
        'øB': ['all'],
        'B': ['all'],
        'B-null': ['all'],
        'B^': ['all'],
        'B^-null': ['all'],
        'C1': ['all'],
        'OB': ['all', 'open'],
        'Q5': ['all', 'open'],
        'OB-spr': ['all', 'open'],
        'douche': ['all', 'open'],
        'CB': ['all', 'close'],
        'S': ['close'],
        'bloem': ['close'],
        'B^-null': ['all'],
        'C1': ['all'],
        '5': ['all', 'wide'],
        '4': ['all', 'wide'],
        'C-spr': ['all', 'curve', 'wide'],
        '5-claw': ['all', 'curve', 'wide'],
        'C': ['all', 'open', 'curve'],
        'C-null': ['all', 'open', 'curve'],
        'O': ['all', 'close', 'curve'],
        # One Shapes
        '1': ['one'],
        '1^': ['one'],
        'D': ['one'],
        'I': ['one', 'ulnar'],
        'L': ['one', 'thumb:out'],
        'Y': ['one', 'ulnar', 'thumb:out'],
        '5R': ['one', 'u:r'],
        '5M': ['one', 'r:u'],
        '5Rx': ['one', 'r:u', 'close'],
        '5Mx': ['one', 'u:r', 'close'],
        'X1': ['one', 'curve'],
        'X2': ['curve'],
        'bOB': ['one', 'open'],
        'open-T': ['one', 'open'],
        'Q': ['one', 'open'],
        'bCB': ['one', 'close'],
        'T': ['one', 'close'],
        'F': ['one', 'close'],
        'b0': ['one', 'close'],
        'T-null': ['one', 'close'],
        'bC': ['one', 'open', 'curve'],
        'bCB”': ['one', 'close', 'curve'],
        'b0': ['one', 'close'],
        'T-null': ['one', 'close'],
        # One => All Shapes
        'U': ['one:all'],
        'N': ['one:all'],
        'U-null': ['one:all'],
        'R': ['one:all'],
        'Q2': ['one:all', 'open'],
        'bOB-2': ['one:all', 'open'],
        'douche2': ['one:all', 'open'],
        'bCB2': ['one:all', 'close'],
        'bC2': ['one:all', 'open', 'curve'],
        'X2-null': ['one:all', 'open', 'curve'],
        'bC2-spr': ['one:all', 'open', 'curve'],
        'W-claw': ['one:all', 'open', 'curve'],
        'X2': ['curve'],
        'U-claw': ['one:all', 'curve'],
        'V-claw': ['one:all', 'curve', 'wide'],
        'bO2': ['one:all', 'close', 'curve'],
        'V': ['one:all', 'wide'],
        'K': ['one:all', 'wide'],
        'P': ['one:all', 'wide'],
        'PI': ['one:all', 'ulnar'],
        'W': ['one:all', 'thumb', 'wide'],
        # All => One Shapes
        'M': ['all:one'],
        '3': ['all:one', 'wide'],
        # Other
        'OTHER': [],
    })
    
    if hand_shape in hand_shape_mapping.keys():
        return set(hand_shape_mapping[hand_shape])
    else:
        return set([])

# Sign Similarity

In [13]:
SUBSET = 300

train = pd.read_excel(f'ngt_signbank_validation_filtered_{SUBSET}.xlsx')

In [14]:
display(train.info(max_cols=0))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Columns: 69 entries, ID to Notes
dtypes: bool(8), float64(4), int64(17), object(40)
memory usage: 145.4+ KB


None

# File Path

In [15]:
def get_file_path(row):
    name = row['Lemma ID Gloss (Dutch)'].replace(':', '_')
    signbank_id = row['Signbank ID']
    
    return f'C:/Users/markw/Documents/Master Stage Videos/NGTvideos/{name}-{signbank_id}.mp4'

train['file_path'] = train.apply(get_file_path, axis=1)

# Location Split

In [16]:
# Strong Hand
train['Location Split'] = train['Location'].apply(lambda e: tuple(re.split(' > | \+ ', str(e))))
train['Strong Hand Split'] = train['Strong Hand'].apply(lambda e: tuple(re.split(' > | \+ ', str(e))))
train['Weak Hand Split'] = train['Weak Hand'].apply(lambda e: tuple(re.split(' > | \+ ', str(e))))

In [17]:
display(train.head())

Unnamed: 0,ID,is_val,Person,folder_index,Signbank ID,Dataset,Lemma ID Gloss (Dutch),Lemma ID Gloss (English),Annotation ID Gloss (Dutch),Annotation ID Gloss (English),...,Simultaneous Morphology,Blend Morphology,Relations to other signs,Relations to foreign signs,Tags,Notes,file_path,Location Split,Strong Hand Split,Weak Hand Split
0,0,X,Mark,2457,1239,NGT,OUDERWETS,,OUDERWETS,OLD-FASHIONED,...,,,,,,,C:/Users/markw/Documents/Master Stage Videos/N...,"(Weak hand: palm,)","(N,)","(N,)"
1,1,X,Mark,366,2824,NGT,BAARD-A,,BAARD-A,BEARD-A,...,,,"variant:2825, variant:4521",,,,C:/Users/markw/Documents/Master Stage Videos/N...,"(Chin,)","(C_spread,)","(C_spread,)"
2,2,X,Mark,625,4536,NGT,BREKEN,,BREKEN-D,BREAK-D,...,,,,,project: FMU,,C:/Users/markw/Documents/Master Stage Videos/N...,"(Neutral space,)","(Baby_C,)","(Baby_C,)"
3,3,X,Mark,1039,1358,NGT,EXAMEN,,EXAMEN,EXAM,...,,,"homonym:1360, homonym:1369",,,,C:/Users/markw/Documents/Master Stage Videos/N...,"(Neutral space,)","(V_curved,)","(V_curved,)"
4,4,X,Mark,300,1398,NGT,AMSTEL-B,,AMSTEL-B,AMSTEL-B,...,,,variant:261,,,,C:/Users/markw/Documents/Master Stage Videos/N...,"(Weak hand: palm,)","(4,)","(C,)"


In [18]:
display(train.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 73 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   ID                                      300 non-null    int64  
 1   is_val                                  254 non-null    object 
 2   Person                                  254 non-null    object 
 3   folder_index                            300 non-null    int64  
 4   Signbank ID                             300 non-null    int64  
 5   Dataset                                 300 non-null    object 
 6   Lemma ID Gloss (Dutch)                  300 non-null    object 
 7   Lemma ID Gloss (English)                4 non-null      object 
 8   Annotation ID Gloss (Dutch)             300 non-null    object 
 9   Annotation ID Gloss (English)           300 non-null    object 
 10  Keywords (Dutch)                        298 non-null    object

None

In [19]:
def get_mean_distance_hand_shape(aa, bb):
    # Equal sets give equal results
    if len(aa) == len(bb) and all([a == b for a, b in zip(aa, bb)]):
         return 0
    
    _sum = 0
    _count = 0
    for a, b in itertools.product(aa, bb):
        _sum += get_distance_hand_shape(a, b)
        _count += 1
        
    return _sum / _count

def get_distance_hand_shape(a, b):
    a_features = get_hand_shape_features(a)
    b_features = get_hand_shape_features(b)
    
    if len(a_features.union(b_features)) == 0:
        return 1
    
    return 1 - (len(a_features.intersection(b_features)) / len(a_features.union(b_features)))

In [20]:
def map_location(location):
    res = []
    for l in location:
        l = str(l).lower()

        # Set missing
        if l == '-':
            res.append('ERROR')
        else:
            for a, b in [('weak hand', 'wh')]:
                l = l.replace(a, b)
                
            res.append(l)
        
    return tuple(res)

def map_movement(movement):
    movement = str(movement).lower()
    
    # Map variations
    mappings = {
        'up and down': 'upwards and downwards',
    }
    
    if movement in mappings:
        movement = mappings[movement]
    
    for a, b in [('-', 'none'), ('-', 'none'), ('nan', 'none'), ('>', 'and'), ('+', 'and'), ('/', ' and ')]:
        movement = movement.replace(a, b)
    
    return movement

In [21]:
def map_hand_shape_split(hand_shape_split):
    res = []
    for hand_shape in hand_shape_split:
        res.append(map_hand_shape(hand_shape))
    
    return res

def map_hand_shape(hand_shape):
    hand_shape = str(hand_shape)
    
    # Full match replacements
    mappings = [
        ('Beak', 'CB'), ('Baby_beak_open', 'bOB'), ('Baby_beak', 'bCB'),
        ('T_open', 'open-T'), ('Beak2', 'bCB2')
    ]
    
    for a, b in mappings:
        if hand_shape.lower() == a.lower():
            return b
    
    # String Replacements
    replacements = [
        ('_spread', '-spr'), ('Baby_', 'b'), ('_curved', '-claw'), ('bO', 'b0'),
        ('Money', 'bCB”'), ('5r', '5R'), ('5m', '5M'), ('Middle finger', 'OTHER'),
        ('B_bent', 'B^'), ('Other', 'OTHER'),
    ]
    
    for a, b in replacements:
        hand_shape = hand_shape.replace(a, b)
        
    # 2nd replace iteration
    for (a, b) in [('B-claw', 'bC'), ('1-claw', 'X1'), ('Beak_open', 'OB'), ('C2-spr', 'bC2-spr')]:
        hand_shape = hand_shape.replace(a, b)
        
    return hand_shape

In [54]:
def get_distance_matrix(df, debug=False):
    res = np.empty(shape=[len(df), len(df)], dtype=np.float32)
    res_detailed = np.empty(shape=[len(df), len(df), 5], dtype=np.float32)
    c = 0
    
    for row_idx, row in tqdm(df.iterrows(), total=len(train)):
        print(f'row_idx: {row_idx}')
        location = map_location(row['Location Split'])
        movement = map_movement(row['Movement Direction'])
        handedness = row['Handedness']
        strong_hand = map_hand_shape_split(row['Strong Hand Split'])
        weak_hand = map_hand_shape_split(row['Weak Hand Split'])
        for row_other_idx, row_other in df.iterrows():
            location_other = map_location(row_other['Location Split'])
            movement_other = map_movement(row_other['Movement Direction'])
            handedness_other = row_other['Handedness']
            strong_hand_other = map_hand_shape_split(row_other['Strong Hand Split'])
            weak_hand_other = map_hand_shape_split(row_other['Weak Hand Split'])
            # Location Distance
            try:
                print(location, location_other)
                location_distance = get_mean_distance(location, location_other, LOCATION_TREE_PATHS, normalize=LOCATIONS_MAX_PATH_LEN)
            except:
                location_distance = -1
                if debug:
                    print(f'LOCATION | "{location}"', f'"{location_other}"', c, row_other_idx)
                    c += 1

            # Movement Distance
            try:
                movement_distance = get_distance(movement, movement_other, MOVEMENT_TREE_PATHS, normalize=MOVEMENTS_MAX_PATH_LEN)
            except:
                movement_distance = -1
                if debug:
                    print(f'MOVEMENT | "{movement}"', f'"{movement_other}"', c, row_other_idx)
                    c += 1
                    
            # Handedness Distance
            try:
                handedness_distance = get_distance(handedness, handedness_other, HANDEDNESS_TREE_PATHS, normalize=HANDEDNESS_MAX_PATH_LEN)
            except:
                handedness_distance = -1
                if debug:
                    print(f'HANDEDNESS | "{handedness}"', f'"{handedness_other}"', c, row_other_idx)
                    c += 1
                    
            # Strong Hand
            try:
                strong_hand_distance = get_mean_distance_hand_shape(strong_hand, strong_hand_other)
            except:
                strong_hand_distance = -1
                if debug:
                    print(f'STRONG HAND | "{strong_hand}"', f'"{strong_hand_other}"', c, row_other_idx)
                    c += 1
                    
            # Weak Hand
            try:
                weak_hand_distance = get_mean_distance_hand_shape(weak_hand, weak_hand_other)
            except:
                weak_hand_distance = -1
                if debug:
                    print(f'WEAK HAND | "{weak_hand}"', f'"{weak_hand}"', c, row_other_idx)
                    c += 1
                
            if any([i == -1 for i in [location_distance, movement_distance, handedness_distance, strong_hand_distance, weak_hand_distance]]):
                print('Error')
                res[row_idx, row_other_idx] = np.inf
            else:
                if debug:
                    print(f'location_distance: {location_distance:.2f}, movement_distance: {movement_distance:.2f}, handedness_distance: {handedness_distance:.2f}, '
                          f' strong_hand_distance: {strong_hand_distance:.2f}, weak_hand_distance: {weak_hand_distance:.2f}')

                res[row_idx, row_other_idx] = 0.30 * handedness_distance + 0.25 * location_distance + 0.25 * movement_distance + 0.10 * strong_hand_distance + 0.10 * weak_hand_distance
                res_detailed[row_idx, row_other_idx] = handedness_distance, location_distance, movement_distance, strong_hand_distance, weak_hand_distance
                
                # Sanity check, distance can't be smaller than 0 or greater than 1
                if any([i > 1 or i < 0 for i in [location_distance, movement_distance, handedness_distance, strong_hand_distance, weak_hand_distance]]):
                    print(f'location_distance: {location_distance:.2f}, movement_distance: {movement_distance:.2f}, handedness_distance: {handedness_distance:.2f}, '
                          f' strong_hand_distance: {strong_hand_distance:.2f}, weak_hand_distance: {weak_hand_distance:.2f}')
                    
        if debug:
            break
    
    return res, res_detailed
                
train_distance_matrix, train_distance_matrix_detailed = get_distance_matrix(abc, debug=False)

  0%|          | 0/300 [00:00<?, ?it/s]

row_idx: 0
('wh: thumb side',) ('wh: thumb side',)
('wh: thumb side',) ('wh: back',)
row_idx: 1
('wh: back',) ('wh: thumb side',)
('wh: back',) ('wh: back',)


In [31]:
abc = pd.read_pickle('train_ngt_dxy_10fps.pkl')

In [56]:
train_distance_matrix_detailed.round(2)

array([[[0.  , 0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.22, 0.  , 1.  , 1.  ]],

       [[0.  , 0.22, 0.  , 1.  , 1.  ],
        [0.  , 0.  , 0.  , 0.  , 0.  ]]], dtype=float32)

In [39]:
abc = abc[(abc['sign_names']=='GROEN') | (abc['sign_names']=='GRIJS-B')].reset_index(drop=True)
display(abc.head())

Unnamed: 0,file_path,file_name,sign_names,label,is_studio,frame_count,folder_index,Signbank ID,left_dx_rolling1_10fps,left_dy_rolling1_10fps,...,right_dx_rolling4_10fps,right_dy_rolling4_10fps,left_dx_rolling5_10fps,left_dy_rolling5_10fps,right_dx_rolling5_10fps,right_dy_rolling5_10fps,Handedness,HandednessNorm,Handedness Predicted,keyframe_idxs
0,C:/Users/markw/Documents/Master Stage Videos/N...,GRIJS-B-2047.mp4,GRIJS-B,1261,True,40,1277,2047,"(0.0, 6.028900146484375, 12.958786010742188, 3...","(0.0, -30.025421142578125, -31.940948486328125...",...,"(0.0, -3.4742698669433594, -5.004915873209636,...","(0.0, -14.511947631835938, -20.674758911132812...","(0.0, 3.0144500732421875, 6.3292287190755205, ...","(0.0, -15.012710571289062, -20.65545654296875,...","(0.0, -3.4742698669433594, -5.004915873209636,...","(0.0, -14.511947631835938, -20.674758911132812...",2a,2.0,True,"(4, 16, 28, 40)"
1,C:/Users/markw/Documents/Master Stage Videos/N...,GROEN-2048.mp4,GROEN,1265,True,34,1281,2048,"(0.0, 0.0243072509765625, -1.024688720703125, ...","(0.0, -1.0111083984375, -2.01129150390625, -23...",...,"(0.0, 1.5037612915039062, 4.6633656819661455, ...","(0.0, 1.50555419921875, 3.329833984375, -5.240...","(0.0, 0.01215362548828125, -0.3334604899088542...","(0.0, -0.50555419921875, -1.0074666341145833, ...","(0.0, 1.5037612915039062, 4.6633656819661455, ...","(0.0, 1.50555419921875, 3.329833984375, -5.240...",2a,2.0,True,"(4, 14, 24, 34)"


In [45]:
abc['Location'] = ['Weak hand: thumb side', 'Weak hand: back']
abc['Strong Hand'] = ['1', 'N']
abc['Weak Hand'] = ['1', 'N']
abc['Movement Direction'] = ['To and fro', 'To and fro']

# Strong Hand
abc['Location Split'] = abc['Location'].apply(lambda e: tuple(re.split(' > | \+ ', str(e))))
abc['Strong Hand Split'] = abc['Strong Hand'].apply(lambda e: tuple(re.split(' > | \+ ', str(e))))
abc['Weak Hand Split'] = abc['Weak Hand'].apply(lambda e: tuple(re.split(' > | \+ ', str(e))))

In [28]:
def print_distances(distances):
    print(f'Handedness Distance: {distances[0]:.2f}, Location Distance: {distances[1]:.2f}, Movement Distance: {distances[2]:.2f}, Strong Hand Distance: {distances[3]:.2f}')

In [78]:
abc = pd.read_excel('ngt_signbank.xlsx')

In [79]:
abc.info(max_cols=0)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4157 entries, 0 to 4156
Columns: 65 entries, Signbank ID to Notes
dtypes: bool(8), float64(2), int64(15), object(40)
memory usage: 1.8+ MB


In [80]:
# pd.DataFrame(abc['Orientation Change']).info()
abc['Orientation Change'].value_counts()

-                           1196
Supination                   140
Flexion                       93
Rotation                      91
Pronation                     62
Ulnar flexion                 46
Extension                     40
Radial flexion                24
Radial and ulnar flexion      20
Pronation/supination          14
Extension/flexion              4
Extension > flexion            3
Name: Orientation Change, dtype: int64

In [26]:
train.loc[train['Annotation ID Gloss (Dutch)'] == 'PENSIOEN-B']

Unnamed: 0,ID,is_val,Person,folder_index,Signbank ID,Dataset,Lemma ID Gloss (Dutch),Lemma ID Gloss (English),Annotation ID Gloss (Dutch),Annotation ID Gloss (English),...,Simultaneous Morphology,Blend Morphology,Relations to other signs,Relations to foreign signs,Tags,Notes,file_path,Location Split,Strong Hand Split,Weak Hand Split
92,92,X,Mark-Bank,2535,581,NGT,PENSIOEN-B,,PENSIOEN-B,RETIREMENT-B,...,,,variant:580,,,,C:/Users/markw/Documents/Master Stage Videos/N...,"(Weak hand: back,)","(B,)","(B,)"


In [29]:
row_idx = np.random.randint(len(train))
row_idx = 92
print(f'row_idx: {row_idx}')
idxs = np.argsort(train_distance_matrix[row_idx])

print('=' * 25, ' ORIGINAL ', '='* 25)
sign = train.loc[row_idx, 'Lemma ID Gloss (Dutch)']
location = train.loc[row_idx, 'Location']
movement_direction = train.loc[row_idx, 'Movement Direction']
handedness = train.loc[row_idx, 'Handedness']
strong_hand = train.loc[row_idx, 'Strong Hand']
print(f'''
Sign: {sign}, Location: {location}, Movement Direction: {movement_direction}, \
Handedness: {handedness}, Strong Hand: {strong_hand}
''')
display(Video.from_file(train.loc[row_idx, 'file_path']))
print('=' * 25, ' ORIGINAL ', '='* 25)

for sort_idx, (idx, row) in enumerate(train.loc[idxs[:10]].iterrows()):
    sign = row['Lemma ID Gloss (Dutch)']
    location = row['Location']
    movement_direction = row['Movement Direction']
    handedness = row['Handedness']
    strong_hand = row['Strong Hand']
    distance = train_distance_matrix[row_idx, idxs[sort_idx]]
    print(f'''
Sign: {sign}, Location: {location}, Movement Direction: {movement_direction}, \
Handedness: {handedness}, Strong Hand: {strong_hand}, Distance: {distance:.2f}
    ''')
    print_distances(train_distance_matrix_detailed[row_idx, idxs[sort_idx]])
    display(Video.from_file(row['file_path']))
    print('='*50)

row_idx: 92

Sign: PENSIOEN-B, Location: Weak hand: back, Movement Direction: Downwards, Handedness: 2a, Strong Hand: B



Video(value=b'\x00\x00\x00\x1cftypmp42\x00\x00\x00\x01isommp41avc1\x00\x00\x0c\xb8moov\x00\x00\x00*iods\x00\x0…


Sign: PENSIOEN-B, Location: Weak hand: back, Movement Direction: Downwards, Handedness: 2a, Strong Hand: B, Distance: 0.00
    
Handedness Distance: 0.00, Location Distance: 0.00, Movement Distance: 0.00, Strong Hand Distance: 0.00


Video(value=b'\x00\x00\x00\x1cftypmp42\x00\x00\x00\x01isommp41avc1\x00\x00\x0c\xb8moov\x00\x00\x00*iods\x00\x0…


Sign: PRINCIPE, Location: Weak hand: palm, Movement Direction: Downwards, Handedness: 2a, Strong Hand: B_bent, Distance: 0.06
    
Handedness Distance: 0.00, Location Distance: 0.22, Movement Distance: 0.00, Strong Hand Distance: 0.00


Video(value=b'\x00\x00\x00\x1cftypmp42\x00\x00\x00\x01isommp41avc1\x00\x00\x0b^moov\x00\x00\x00*iods\x00\x00\x…


Sign: PLAKKEN-A, Location: Weak hand: palm, Movement Direction: Downwards, Handedness: 2a, Strong Hand: B, Distance: 0.06
    
Handedness Distance: 0.00, Location Distance: 0.22, Movement Distance: 0.00, Strong Hand Distance: 0.00


Video(value=b'\x00\x00\x00\x1cftypmp42\x00\x00\x00\x01isommp41avc1\x00\x00\x0cpmoov\x00\x00\x00*iods\x00\x00\x…


Sign: LIJST-C, Location: Weak hand: palm, Movement Direction: Downwards, Handedness: 2a, Strong Hand: B, Distance: 0.06
    
Handedness Distance: 0.00, Location Distance: 0.22, Movement Distance: 0.00, Strong Hand Distance: 0.00


Video(value=b'\x00\x00\x00\x1cftypmp42\x00\x00\x00\x01isommp41avc1\x00\x00\x0cumoov\x00\x00\x00*iods\x00\x00\x…


Sign: MAAND-D, Location: Weak hand: palm, Movement Direction: Downwards, Handedness: 2a, Strong Hand: Baby_C, Distance: 0.16
    
Handedness Distance: 0.00, Location Distance: 0.22, Movement Distance: 0.00, Strong Hand Distance: 1.00


Video(value=b'\x00\x00\x00\x1cftypmp42\x00\x00\x00\x01isommp41avc1\x00\x00\x0b\tmoov\x00\x00\x00*iods\x00\x00\…


Sign: ENZOVOORTS-C, Location: Weak hand: finger tips, Movement Direction: Downwards, Handedness: 2a, Strong Hand: 4, Distance: 0.16
    
Handedness Distance: 0.00, Location Distance: 0.22, Movement Distance: 0.00, Strong Hand Distance: 0.50


Video(value=b'\x00\x00\x00\x1cftypmp42\x00\x00\x00\x01isommp41avc1\x00\x00\x0c\xffmoov\x00\x00\x00*iods\x00\x0…


Sign: DEUR-A, Location: Weak hand: back, Movement Direction: nan, Handedness: 2a, Strong Hand: B, Distance: 0.21
    
Handedness Distance: 0.00, Location Distance: 0.00, Movement Distance: 0.86, Strong Hand Distance: 0.00


Video(value=b'\x00\x00\x00\x1cftypmp42\x00\x00\x00\x01isommp41avc1\x00\x00\x0ctmoov\x00\x00\x00*iods\x00\x00\x…


Sign: AMSTEL-B, Location: Weak hand: palm, Movement Direction: Upwards, Handedness: 2a, Strong Hand: 4, Distance: 0.24
    
Handedness Distance: 0.00, Location Distance: 0.22, Movement Distance: 0.29, Strong Hand Distance: 0.50


Video(value=b'\x00\x00\x00\x1cftypmp42\x00\x00\x00\x01isommp41avc1\x00\x00\x0c\x94moov\x00\x00\x00*iods\x00\x0…


Sign: OUDERWETS, Location: Weak hand: palm, Movement Direction: Downwards, Handedness: 2a, Strong Hand: N, Distance: 0.26
    
Handedness Distance: 0.00, Location Distance: 0.22, Movement Distance: 0.00, Strong Hand Distance: 1.00


Video(value=b'\x00\x00\x00\x1cftypmp42\x00\x00\x00\x01isommp41avc1\x00\x00\x0c\x94moov\x00\x00\x00*iods\x00\x0…


Sign: REKENING-HOUDEN-MET, Location: Weak hand: thumb side, Movement Direction: Downwards, Handedness: 2a, Strong Hand: V, Distance: 0.26
    
Handedness Distance: 0.00, Location Distance: 0.22, Movement Distance: 0.00, Strong Hand Distance: 1.00


Video(value=b'\x00\x00\x00\x1cftypmp42\x00\x00\x00\x01isommp41avc1\x00\x00\x0b\x98moov\x00\x00\x00*iods\x00\x0…



# Add High Level Categories

In [82]:
movement_categories = [
    'none',
    'vertical',
    'diagonal',
    'horizontal',
    'other',
]

location_categegories = [
    'upper body',
    'lower torso',
    'sub torso',
    'non-neutral space',
    'neutral space',
    'other',
]

In [83]:
def get_location_cats(ee):
    res = []
    ee = map_location(ee)
    for e in ee:
        for node in reversed(LOCATION_TREE_PATHS[e]):
            if node.lower() in location_categegories:
                res.append(node)
                break
    return tuple(set(res))
        
train['location cat'] = train['Location Split'].apply(get_location_cats)
display(train['location cat'].value_counts())

(neutral space,)                   162
(upper body,)                       79
(non-neutral space,)                46
(lower torso,)                       5
(neutral space, upper body)          3
(sub torso,)                         3
(non-neutral space, upper body)      1
(lower torso, upper body)            1
Name: location cat, dtype: int64

In [84]:
def get_movement_cats(e):
    res = []
    e = map_movement(e)
    for node in reversed(MOVEMENT_TREE_PATHS[e]):
        if node.lower() in movement_categories:
            return tuple(set([node]))
        
train['movement cat'] = train['Movement Direction'].apply(get_movement_cats)
display(train['movement cat'].value_counts())

(horizontal,)    103
(none,)          101
(vertical,)       65
(other,)          17
(diagonal,)       14
Name: movement cat, dtype: int64

# Categories Count Dictionary

In [85]:
def get_cat_count_dict(categories, key):
    res = dict([(c, 0) for c in categories])
    for l in categories:
        for cats in train[key]:
            for c in cats:
                if c == l:
                    res[c] += 1
    return res

lc_count_dict = get_cat_count_dict(location_categegories, 'location cat')
mc_count_dict = get_cat_count_dict(movement_categories, 'movement cat')

In [86]:
def get_cat_count_tuple(cats, cat_count_dir):
    res = []
    for c in cats:
        res.append(cat_count_dir[c])
    return tuple(res)

In [87]:
train['location cat count'] = train['location cat'].apply(lambda cats: get_cat_count_tuple(cats, lc_count_dict))
train['movement cat count'] = train['movement cat'].apply(lambda cats: get_cat_count_tuple(cats, mc_count_dict))

In [88]:
train.to_pickle(f'ngt_signbank_validation_filtered_{SUBSET}_cats.pkl.xz')

# Save Similarities

In [89]:
np.save('train_distance_matrix_300.npy', train_distance_matrix)
np.save('train_distance_matrix_detailed_300.npy', train_distance_matrix_detailed)

In [90]:
# Train Similarity Detailed DataFrame
train_similarity_detailed_dict = {
    'handedness_distance': train_distance_matrix_detailed[:,:,0],
    'location_distance': train_distance_matrix_detailed[:,:,1],
    'movement_distance': train_distance_matrix_detailed[:,:,2],
    'strong_hand_distance': train_distance_matrix_detailed[:,:,3],
    'weak_hand_distance': train_distance_matrix_detailed[:,:,4],
}

In [91]:
with lzma.open(f'train_distance_matrix_detailed_300_dict.xr', 'wb') as f:
    pickle.dump(train_similarity_detailed_dict, f)