In [68]:

import os
import sys
import inspect
import pandas as pd

from evaluate import evaluate

## Models trained on Thumbset 

#### Best models ablated by encoder type and soft/no soft labels 

In [2]:
%%capture 
gnn = evaluate(
    "best_official_base#augmented_noisy_random_seq2seq#seq2seq_noisy#gnn:ar.pth",
    beam_k=6
).compute()
gnn_soft = evaluate(
    "best_official_base#augmented_noisy_random_seq2seq#seq2seq_noisy#soft(gnn:ar).pth",
    beam_k=6
).compute()
lstm = evaluate(
    "best_official_base#augmented_noisy_random_seq2seq#seq2seq_noisy#lstm:ar.pth",
    beam_k=6
).compute()
lstm_soft = evaluate(
    "best_official_base#augmented_noisy_random_seq2seq#seq2seq_noisy#soft(lstm:ar).pth",
    beam_k=6
).compute()

In [3]:
table = {
    "ArThumbLSTM-soft": lstm_soft['stats']['full'],
    "ArThumbLSTM": lstm['stats']['full'], 
    "ArThumbGNN-soft": gnn_soft['stats']['full'],
    "ArThumbGNN": gnn['stats']['full']
}
only_thumbset = pd.DataFrame(table).transpose()
only_thumbset

Unnamed: 0,gmr,hmr,smr,rmr,cpr
ArThumbLSTM-soft,64.082979,70.032417,83.606837,76.197673,1.044268
ArThumbLSTM,64.376029,70.5933,84.455241,76.580648,1.009667
ArThumbGNN-soft,65.236691,71.035931,84.815295,76.625344,0.966779
ArThumbGNN,64.698163,70.155646,84.749871,76.601972,0.980424


#### ablation by hand of gnn model and lstm model 

In [4]:
table = {
    "ArThumbLSTM-soft": lstm_soft['stats'],
    "ArThumbLSTM": lstm['stats'], 
    "ArThumbGNN-soft": gnn_soft['stats'],
    "ArThumbGNN": gnn['stats']
}
pd.concat({k: pd.DataFrame(v).T for k, v in table.items()}, axis=0)

Unnamed: 0,Unnamed: 1,gmr,hmr,smr,rmr,cpr
ArThumbLSTM-soft,full,64.082979,70.032417,83.606837,76.197673,1.044268
ArThumbLSTM-soft,right,59.75654,65.048497,82.433585,73.368057,1.068056
ArThumbLSTM-soft,left,69.124774,76.213902,85.637848,79.752799,1.026578
ArThumbLSTM,full,64.376029,70.5933,84.455241,76.580648,1.009667
ArThumbLSTM,right,59.563759,65.529533,82.712194,73.370709,1.05479
ArThumbLSTM,left,69.360201,76.283783,86.409522,80.022446,0.966937
ArThumbGNN-soft,full,65.236691,71.035931,84.815295,76.625344,0.966779
ArThumbGNN-soft,right,63.519726,70.118263,86.749809,77.145793,0.984018
ArThumbGNN-soft,left,66.701942,71.789332,82.549761,75.828254,0.9679
ArThumbGNN,full,64.698163,70.155646,84.749871,76.601972,0.980424


#### ablation of beam decoder

In [5]:
%%capture 
gnn = evaluate(
    "best_official_base#augmented_noisy_random_seq2seq#seq2seq_noisy#gnn:ar.pth",
    beam_k=1
).compute()['stats']['full']
gnn_soft = evaluate(
    "best_official_base#augmented_noisy_random_seq2seq#seq2seq_noisy#soft(gnn:ar).pth",
    beam_k=1
).compute()['stats']['full']
lstm = evaluate(
    "best_official_base#augmented_noisy_random_seq2seq#seq2seq_noisy#lstm:ar.pth",
    beam_k=1
).compute()['stats']['full']
lstm_soft = evaluate(
    "best_official_base#augmented_noisy_random_seq2seq#seq2seq_noisy#soft(lstm:ar).pth",
    beam_k=1
).compute()['stats']['full']

In [6]:
table = {
    "ArThumbLSTM-soft": lstm_soft,
    "ArThumbLSTM": lstm, 
    "ArThumbGNN-soft": gnn_soft,
    "ArThumbGNN": gnn
}
pd.DataFrame(table).transpose()

Unnamed: 0,gmr,hmr,smr,rmr,cpr
ArThumbLSTM-soft,62.678863,69.066792,82.969975,75.191119,1.010059
ArThumbLSTM,62.830024,68.909288,82.821988,74.49036,1.012468
ArThumbGNN-soft,62.44784,68.125594,82.244365,73.581235,0.958181
ArThumbGNN,61.21457,66.113227,81.254813,72.448937,0.95044


#### ablation data augmentation

In [7]:
%%capture 
gnn = evaluate(
    "best_ablation_base_augmentation#no_augmented_noisy_random_seq2seq#seq2seq_noisy#gnn:ar.pth",
    beam_k=1
).compute()['stats']['full']
gnn_soft = evaluate(
    "best_ablation_base_augmentation#no_augmented_noisy_random_seq2seq#seq2seq_noisy#soft(gnn:ar).pth",
    beam_k=1
).compute()['stats']['full']
lstm = evaluate(
    "best_ablation_base_augmentation#no_augmented_noisy_random_seq2seq#seq2seq_noisy#lstm:ar.pth",
    beam_k=1
).compute()['stats']['full']
lstm_soft = evaluate(
    "best_ablation_base_augmentation#no_augmented_noisy_random_seq2seq#seq2seq_noisy#soft(lstm:ar).pth",
    beam_k=1
).compute()['stats']['full']

In [8]:
table = {
    "ArThumbLSTM-soft": lstm_soft,
    "ArThumbLSTM": lstm, 
    "ArThumbGNN-soft": gnn_soft,
    "ArThumbGNN": gnn
}
pd.DataFrame(table).transpose()

Unnamed: 0,gmr,hmr,smr,rmr,cpr
ArThumbLSTM-soft,62.143064,67.817249,82.106669,73.430414,1.00338
ArThumbLSTM,58.677356,64.380924,78.419827,69.656638,0.98459
ArThumbGNN-soft,53.044024,57.148342,72.539399,62.367956,0.88164
ArThumbGNN,53.783114,59.063806,73.528206,63.665884,0.905592


#### ablation autoregressive

In [9]:
%%capture 
gnn_soft = evaluate(
    "best_ablation_base_autoregressive#augmented_noisy_random_seq2seq#seq2seq_noisy#soft(gnn:fc).pth",
    alias_architecture='fc'
).compute()['stats']['full']

lstm = evaluate(
    "best_ablation_base_autoregressive#augmented_noisy_random_seq2seq#seq2seq_noisy#lstm:fc.pth",
    alias_architecture='fc'
).compute()['stats']['full']

In [10]:
table = {
#     "ArThumbLSTM-soft": lstm_soft,
    "ArThumbLSTM": lstm, 
    "ArThumbGNN-soft": gnn_soft,
#     "ArThumbGNN": gnn
}
pd.DataFrame(table).transpose()

Unnamed: 0,gmr,hmr,smr,rmr,cpr
ArThumbLSTM,27.385242,29.790501,36.357534,30.588727,0.314443
ArThumbGNN-soft,61.560856,65.956026,81.263443,69.248868,0.783834


## FINETUNING

#### Best models ablated by encoder type and soft/no soft labels 

In [11]:
%%capture 
models = {
    "frozen": {
        "ArThumbLSTM-soft": 'best_lh_official#nakamura_augmented_seq2seq_separated#finetuning_separated#soft(lstm:ar)_1.pth',
        "ArThumbLSTM": 'best_lh_official#nakamura_augmented_seq2seq_separated#finetuning_separated#lstm:ar_1.pth', 
        "ArThumbGNN-soft": 'best_lh_official#nakamura_augmented_seq2seq_separated#finetuning_separated#soft(gnn:ar)_1.pth',
        "ArThumbGNN": 'best_lh_official#nakamura_augmented_seq2seq_separated#finetuning_separated#gnn:ar_1.pth'
    },
    "no-frozen":{
        "ArThumbLSTM-soft": 'best_lh_official#nakamura_augmented_seq2seq_separated#finetuning_separated#soft(lstm:ar)_0.pth',
        "ArThumbLSTM": 'best_lh_official#nakamura_augmented_seq2seq_separated#finetuning_separated#lstm:ar_0.pth', 
        "ArThumbGNN-soft": 'best_lh_official#nakamura_augmented_seq2seq_separated#finetuning_separated#soft(gnn:ar)_0.pth',
        "ArThumbGNN": 'best_lh_official#nakamura_augmented_seq2seq_separated#finetuning_separated#gnn:ar_0.pth' 
    }
}

table = models.copy()
for encoder, v in models.items():
    for model, lh in v.items():
        rh = lh.replace('lh', 'rh')
        table[encoder][model] = evaluate(
            rh, lh,
            beam_k=6
        ).compute()['stats']

In [12]:
finetuning_results_frozen = pd.concat({k: pd.DataFrame(v).T for k, v in table['frozen'].items()}, axis=1)
finetuning_results_frozen.round(2)

Unnamed: 0_level_0,ArThumbLSTM-soft,ArThumbLSTM-soft,ArThumbLSTM-soft,ArThumbLSTM-soft,ArThumbLSTM-soft,ArThumbLSTM,ArThumbLSTM,ArThumbLSTM,ArThumbLSTM,ArThumbLSTM,ArThumbGNN-soft,ArThumbGNN-soft,ArThumbGNN-soft,ArThumbGNN-soft,ArThumbGNN-soft,ArThumbGNN,ArThumbGNN,ArThumbGNN,ArThumbGNN,ArThumbGNN
Unnamed: 0_level_1,gmr,hmr,smr,rmr,cpr,gmr,hmr,smr,rmr,cpr,gmr,hmr,smr,rmr,cpr,gmr,hmr,smr,rmr,cpr
full,65.32,71.62,85.21,77.69,1.02,65.34,71.73,85.49,77.8,1.01,65.99,72.43,85.71,77.59,0.94,66.34,72.19,86.31,78.02,0.96
right,61.84,68.25,85.05,76.04,1.04,60.41,66.91,84.07,74.75,1.04,62.88,69.66,86.01,76.65,0.96,63.4,69.21,86.55,77.2,0.98
left,69.2,75.9,86.12,79.95,1.01,70.69,77.3,87.57,81.36,1.01,69.18,75.34,85.56,78.58,0.94,69.09,75.27,86.19,78.81,0.95


In [13]:
finetuning_results = pd.concat({k: pd.DataFrame(v).T for k, v in table['no-frozen'].items()}, axis=1)
finetuning_results.round(2)

Unnamed: 0_level_0,ArThumbLSTM-soft,ArThumbLSTM-soft,ArThumbLSTM-soft,ArThumbLSTM-soft,ArThumbLSTM-soft,ArThumbLSTM,ArThumbLSTM,ArThumbLSTM,ArThumbLSTM,ArThumbLSTM,ArThumbGNN-soft,ArThumbGNN-soft,ArThumbGNN-soft,ArThumbGNN-soft,ArThumbGNN-soft,ArThumbGNN,ArThumbGNN,ArThumbGNN,ArThumbGNN,ArThumbGNN
Unnamed: 0_level_1,gmr,hmr,smr,rmr,cpr,gmr,hmr,smr,rmr,cpr,gmr,hmr,smr,rmr,cpr,gmr,hmr,smr,rmr,cpr
full,65.07,71.58,85.27,77.92,1.02,64.01,69.95,84.32,76.4,1.05,66.84,72.62,86.83,78.55,0.99,66.48,72.75,86.47,78.5,0.97
right,60.99,67.38,84.69,75.7,1.03,59.87,65.55,83.84,74.21,1.07,62.77,68.87,86.15,76.6,0.99,63.09,69.62,85.99,77.12,0.99
left,69.53,76.67,86.43,80.56,1.0,68.78,75.34,85.7,79.31,1.03,70.92,76.32,87.58,80.37,0.99,70.17,76.34,87.37,80.06,0.96


## The same table than above but without beam decoding

In [14]:
%%capture 
models = {
    "frozen": {
        "ArThumbLSTM-soft": 'best_lh_official#nakamura_augmented_seq2seq_separated#finetuning_separated#soft(lstm:ar)_1.pth',
        "ArThumbLSTM": 'best_lh_official#nakamura_augmented_seq2seq_separated#finetuning_separated#lstm:ar_1.pth', 
        "ArThumbGNN-soft": 'best_lh_official#nakamura_augmented_seq2seq_separated#finetuning_separated#soft(gnn:ar)_1.pth',
        "ArThumbGNN": 'best_lh_official#nakamura_augmented_seq2seq_separated#finetuning_separated#gnn:ar_1.pth'
    },
    "no-frozen":{
        "ArThumbLSTM-soft": 'best_lh_official#nakamura_augmented_seq2seq_separated#finetuning_separated#soft(lstm:ar)_0.pth',
        "ArThumbLSTM": 'best_lh_official#nakamura_augmented_seq2seq_separated#finetuning_separated#lstm:ar_0.pth', 
        "ArThumbGNN-soft": 'best_lh_official#nakamura_augmented_seq2seq_separated#finetuning_separated#soft(gnn:ar)_0.pth',
        "ArThumbGNN": 'best_lh_official#nakamura_augmented_seq2seq_separated#finetuning_separated#gnn:ar_0.pth' 
    }
}

table = models.copy()
for encoder, v in models.items():
    for model, lh in v.items():
        rh = lh.replace('lh', 'rh')
        table[encoder][model] = evaluate(
            rh, lh,
            beam_k=1
        ).compute()['stats']['full']


In [15]:
ablation_beam_decoder = pd.concat({k: pd.DataFrame(v).T for k, v in table.items()}, axis=0)
ablation_beam_decoder

Unnamed: 0,Unnamed: 1,gmr,hmr,smr,rmr,cpr
frozen,ArThumbLSTM-soft,63.420746,69.399688,83.605792,75.69963,1.015895
frozen,ArThumbLSTM,64.251577,70.726135,84.881778,76.749882,1.021898
frozen,ArThumbGNN-soft,63.704248,69.705066,83.566685,74.91705,0.946713
frozen,ArThumbGNN,63.578682,68.808781,83.699848,74.734472,0.950548
no-frozen,ArThumbLSTM-soft,62.91832,69.037796,83.128953,75.397212,1.0004
no-frozen,ArThumbLSTM,62.908694,68.533019,83.24988,75.191578,1.019288
no-frozen,ArThumbGNN-soft,64.698887,70.116245,84.942785,75.925227,0.954638
no-frozen,ArThumbGNN,64.498522,70.189606,84.93997,75.851965,0.954637


### GNN model and LSTM model trained without domain adaptation


In [16]:
%%capture 
models = {
    "ArThumbLSTM": 'best_lh_official2#nakamura_augmented_seq2seq_separated#finetuning_separated#lstm:ar_-1.pth',
    "ArThumbGNN": 'best_lh_official2#nakamura_augmented_seq2seq_separated#finetuning_separated#gnn:ar_-1.pth'
}

table = models.copy()
for model, lh in models.items():
    rh = lh.replace('lh', 'rh')
    table[model] = evaluate(
        rh, lh,
        beam_k=1
    ).compute()['stats']['full']



In [17]:
only_PIG = pd.DataFrame(table).transpose()
only_PIG

Unnamed: 0,gmr,hmr,smr,rmr,cpr
ArThumbLSTM,26.818374,29.568957,37.37936,31.478006,0.43668
ArThumbGNN,60.52714,65.311447,80.379531,69.929908,0.872351


### ABLATION DECODER

In [18]:
%%capture 
models = {
    "frozen": {
        "ArThumbLSTM": 'best_lh_ablation_base_autoregressive_finetuning#nakamura_augmented_seq2seq_separated#finetuning_separated#lstm:fc_1.pth', 
        "ArThumbGNN-soft": 'best_lh_ablation_base_autoregressive_finetuning#nakamura_augmented_seq2seq_separated#finetuning_separated#soft(gnn:fc)_1.pth',
    },
    "no-frozen":{
        "ArThumbLSTM": 'best_lh_ablation_base_autoregressive_finetuning#nakamura_augmented_seq2seq_separated#finetuning_separated#lstm:fc_0.pth', 
        "ArThumbGNN-soft": 'best_lh_ablation_base_autoregressive_finetuning#nakamura_augmented_seq2seq_separated#finetuning_separated#soft(gnn:fc)_0.pth'
    }
}

table = models.copy()
for encoder, v in models.items():
    for model, lh in v.items():
        rh = lh.replace('lh', 'rh')
        table[encoder][model] = evaluate(
            rh, lh,
            alias_architecture = "fc"
        ).compute()['stats']['full']


In [19]:
ablation_decoder = pd.concat({k: pd.DataFrame(v).T for k, v in table.items()}, axis=0)
ablation_decoder

Unnamed: 0,Unnamed: 1,gmr,hmr,smr,rmr,cpr
frozen,ArThumbLSTM,27.385242,29.790501,36.357534,30.588727,0.314443
frozen,ArThumbGNN-soft,61.590995,65.968255,81.286494,69.27797,0.785787
no-frozen,ArThumbLSTM,27.385242,29.790501,36.357534,30.588727,0.314443
no-frozen,ArThumbGNN-soft,63.554797,68.089687,83.563469,71.386255,0.830266


#### ABLATION data-augmentation

In [20]:
%%capture 
models = {
    "frozen": {
        "ArThumbLSTM": 'best_lh_ablation_base_augmentation_finetuning#nakamura_no_augmented_seq2seq_separated#finetuning_separated#noauglstm:ar_1.pth', 
        "ArThumbGNN-soft": 'best_lh_ablation_base_augmentation_finetuning#nakamura_no_augmented_seq2seq_separated#finetuning_separated#noaugsoft(gnn:ar)_1.pth',
    },
    "no-frozen":{
        "ArThumbLSTM": 'best_lh_ablation_base_augmentation_finetuning#nakamura_no_augmented_seq2seq_separated#finetuning_separated#noauglstm:ar_0.pth', 
        "ArThumbGNN-soft": 'best_lh_ablation_base_augmentation_finetuning#nakamura_no_augmented_seq2seq_separated#finetuning_separated#noaugsoft(gnn:ar)_0.pth'
    }
}

table = models.copy()
for encoder, v in models.items():
    for model, lh in v.items():
        rh = lh.replace('lh', 'rh')
        table[encoder][model] = evaluate(
            rh, lh,
            alias_architecture = "ar",
            beam_k=6,
        ).compute()['stats']['full']

In [21]:
ablation_augmentation = pd.concat({k: pd.DataFrame(v).T for k, v in table.items()}, axis=0)
ablation_augmentation

Unnamed: 0,Unnamed: 1,gmr,hmr,smr,rmr,cpr
frozen,ArThumbLSTM,64.692389,70.724903,84.006834,75.967938,1.012891
frozen,ArThumbGNN-soft,57.403008,61.991601,76.427949,66.663674,0.893929
no-frozen,ArThumbLSTM,65.177536,71.402118,85.149655,76.914269,1.042029
no-frozen,ArThumbGNN-soft,60.356269,65.82174,80.443958,70.973047,0.944246


# NAKAMURA BASELINES

## HMM-based baselines

### trained on PIG dataset

In [22]:
%%capture 
models = {
    'HMM1': 'official_results_HMM/PIG/HMM1/test_output',
    'HMM2': 'official_results_HMM/PIG/HMM2/test_output',
    'HMM3': 'official_results_HMM/PIG/HMM3/test_output'
}
results = models.copy()
for hmm_type in ['HMM1', 'HMM2', 'HMM3']:
    results[hmm_type] = evaluate(f"official#PIG#{hmm_type}#0").compute_from_files(models[hmm_type])['stats']

In [23]:
hmm_results = pd.concat({k: pd.DataFrame(v).T for k, v in results.items()}, axis=1)
hmm_results.round(2)

Unnamed: 0_level_0,HMM1,HMM1,HMM1,HMM1,HMM1,HMM2,HMM2,HMM2,HMM2,HMM2,HMM3,HMM3,HMM3,HMM3,HMM3
Unnamed: 0_level_1,gmr,hmr,smr,rmr,cpr,gmr,hmr,smr,rmr,cpr,gmr,hmr,smr,rmr,cpr
full,61.77,67.66,81.09,73.15,0.89,63.78,69.49,83.59,76.12,0.98,63.63,69.4,83.05,75.42,0.94
right,58.34,65.08,81.1,72.04,0.91,60.65,66.72,83.92,75.17,1.02,60.54,66.86,83.38,74.42,0.95
left,65.08,70.17,81.1,74.12,0.89,66.3,71.88,82.98,76.61,0.95,66.24,71.7,82.41,75.91,0.95


In [73]:
%%capture 
models = {
    'HMM1': 'official_results_HMM/naka/Res_HMM1',
    'HMM2': 'official_results_HMM/naka/Res_HMM2',
    'HMM3': 'official_results_HMM/naka/Res_HMM3'
}
results = models.copy()
for hmm_type in ['HMM1', 'HMM2', 'HMM3']:
    results[hmm_type] = evaluate(f"official#PIG#{hmm_type}#0").compute_from_files(models[hmm_type])['stats']

In [74]:
hmm_results = pd.concat({k: pd.DataFrame(v).T for k, v in results.items()}, axis=1)
hmm_results.round(2)

Unnamed: 0_level_0,HMM1,HMM1,HMM1,HMM1,HMM1,HMM2,HMM2,HMM2,HMM2,HMM2,HMM3,HMM3,HMM3,HMM3,HMM3
Unnamed: 0_level_1,gmr,hmr,smr,rmr,cpr,gmr,hmr,smr,rmr,cpr,gmr,hmr,smr,rmr,cpr
full,61.77,67.66,81.09,73.15,0.89,63.78,69.49,83.59,76.12,0.98,63.63,69.4,83.05,75.42,0.94
right,58.34,65.08,81.1,72.04,0.91,60.65,66.72,83.92,75.17,1.02,60.54,66.86,83.38,74.42,0.95
left,65.08,70.17,81.1,74.12,0.89,66.3,71.88,82.98,76.61,0.95,66.24,71.7,82.41,75.91,0.95


### Trained on Thumbset dataset

In [72]:
%%capture 
output = {
    'HMM1': 'official_results_HMM/ThumbSet/HMM1',
    'HMM2': 'official_results_HMM/ThumbSet/HMM2',
    'HMM3': 'official_results_HMM/ThumbSet/HMM3'
}
results = output.copy()
for hmm_type in ['HMM1', 'HMM2', 'HMM3']:
    results[hmm_type] = evaluate(f"official#ThumbSet#{hmm_type}#0").compute_from_files(output[hmm_type])['stats']

In [25]:
pd.concat({k: pd.DataFrame(v).T for k, v in results.items()}, axis=1)

Unnamed: 0_level_0,HMM1,HMM1,HMM1,HMM1,HMM1,HMM2,HMM2,HMM2,HMM2,HMM2,HMM3,HMM3,HMM3,HMM3,HMM3
Unnamed: 0_level_1,gmr,hmr,smr,rmr,cpr,gmr,hmr,smr,rmr,cpr,gmr,hmr,smr,rmr,cpr
full,61.772802,67.661373,81.085877,73.147281,0.894376,63.782128,69.490986,83.59367,76.121843,0.980442,63.626609,69.400962,83.051283,75.424242,0.944073
right,58.344865,65.078334,81.09664,72.042244,0.905541,60.646635,66.720136,83.919112,75.168445,1.01721,60.541365,66.860791,83.382152,74.420723,0.951814
left,65.082201,70.174935,81.10283,74.119523,0.893736,66.299342,71.881363,82.97706,76.60536,0.950188,66.238776,71.700458,82.413279,75.90972,0.949349


### DEEP-BASED baselines

In [26]:
%%capture 
fc = evaluate(
        "baseline_official_models/right#PIG#classification#nakamura_baseline_ff.pth",
        "baseline_official_models/left#PIG#classification#nakamura_baseline_ff.pth",
        alias_architecture='windowed_nakamura'
    ).compute()['stats']

lstm = evaluate(
    "baseline_official_models/right#PIG#classification#nakamura_baseline_lstm.pth",
    "baseline_official_models/left#PIG#classification#nakamura_baseline_lstm.pth",
    alias_architecture='windowed_nakamura_original'
).compute()['stats']

In [27]:
results = {"LSTM-baseline": lstm, "FF-baseline": fc}
dnn_nakamura_results = pd.concat({k: pd.DataFrame(v).T for k, v in results.items()}, axis=1)
dnn_nakamura_results

Unnamed: 0_level_0,LSTM-baseline,LSTM-baseline,LSTM-baseline,LSTM-baseline,LSTM-baseline,FF-baseline,FF-baseline,FF-baseline,FF-baseline,FF-baseline
Unnamed: 0_level_1,gmr,hmr,smr,rmr,cpr,gmr,hmr,smr,rmr,cpr
full,62.706237,67.342165,82.687373,71.089658,0.925258,61.440486,66.312495,81.341563,69.820678,0.843684
right,58.446861,63.074706,81.580026,67.491235,0.928684,56.890664,61.768745,79.982022,66.567403,0.852543
left,67.510308,72.171179,84.560512,75.261245,0.902508,66.379208,71.407713,83.332069,73.585441,0.818459


# First Table

In [28]:
first_table = {'HMM1': hmm_results.round(2)['HMM1'].to_dict()}
hmm_results.round(2)['HMM1']

Unnamed: 0,gmr,hmr,smr,rmr,cpr
full,61.77,67.66,81.09,73.15,0.89
right,58.34,65.08,81.1,72.04,0.91
left,65.08,70.17,81.1,74.12,0.89


In [29]:
first_table['HMM2'] = hmm_results.round(2)['HMM2'].to_dict()
hmm_results.round(2)['HMM2']

Unnamed: 0,gmr,hmr,smr,rmr,cpr
full,63.78,69.49,83.59,76.12,0.98
right,60.65,66.72,83.92,75.17,1.02
left,66.3,71.88,82.98,76.61,0.95


In [30]:
first_table['HMM3'] = hmm_results.round(2)['HMM3'].to_dict()
hmm_results.round(2)['HMM3']

Unnamed: 0,gmr,hmr,smr,rmr,cpr
full,63.63,69.4,83.05,75.42,0.94
right,60.54,66.86,83.38,74.42,0.95
left,66.24,71.7,82.41,75.91,0.95


In [31]:
first_table['FF-baseline'] = dnn_nakamura_results.round(2)['FF-baseline'].to_dict()
dnn_nakamura_results.round(2)['FF-baseline']

Unnamed: 0,gmr,hmr,smr,rmr,cpr
full,61.44,66.31,81.34,69.82,0.84
right,56.89,61.77,79.98,66.57,0.85
left,66.38,71.41,83.33,73.59,0.82


In [32]:
first_table['LSTM-baseline'] = dnn_nakamura_results.round(2)['LSTM-baseline'].to_dict()
dnn_nakamura_results.round(2)['LSTM-baseline']

Unnamed: 0,gmr,hmr,smr,rmr,cpr
full,62.71,67.34,82.69,71.09,0.93
right,58.45,63.07,81.58,67.49,0.93
left,67.51,72.17,84.56,75.26,0.9


In [33]:
first_table['ArThumbGNN-soft'] = finetuning_results.round(2)['ArThumbGNN-soft'].to_dict()
finetuning_results.round(2)['ArThumbGNN-soft']

Unnamed: 0,gmr,hmr,smr,rmr,cpr
full,66.84,72.62,86.83,78.55,0.99
right,62.77,68.87,86.15,76.6,0.99
left,70.92,76.32,87.58,80.37,0.99


In [34]:
first_table['ArThumbLSTM-frozen'] = finetuning_results_frozen.round(2)['ArThumbLSTM'].to_dict()
finetuning_results_frozen.round(2)['ArThumbLSTM']

Unnamed: 0,gmr,hmr,smr,rmr,cpr
full,65.34,71.73,85.49,77.8,1.01
right,60.41,66.91,84.07,74.75,1.04
left,70.69,77.3,87.57,81.36,1.01


In [35]:
for name_model in reversed(['ArThumbGNN-soft', 'ArThumbLSTM-frozen', 'HMM3','HMM2', 'LSTM-baseline', 'HMM1', 'FF-baseline']):
    print(f"{name_model}\t", end='')
    for hand in ['right', 'left', 'full']:
        for metric in ['gmr', 'hmr', 'smr', 'rmr', 'cpr']:
            print(f'{first_table[name_model][metric][hand]}\t', end="")
    print()

FF-baseline	56.89	61.77	79.98	66.57	0.85	66.38	71.41	83.33	73.59	0.82	61.44	66.31	81.34	69.82	0.84	
HMM1	58.34	65.08	81.1	72.04	0.91	65.08	70.17	81.1	74.12	0.89	61.77	67.66	81.09	73.15	0.89	
LSTM-baseline	58.45	63.07	81.58	67.49	0.93	67.51	72.17	84.56	75.26	0.9	62.71	67.34	82.69	71.09	0.93	
HMM2	60.65	66.72	83.92	75.17	1.02	66.3	71.88	82.98	76.61	0.95	63.78	69.49	83.59	76.12	0.98	
HMM3	60.54	66.86	83.38	74.42	0.95	66.24	71.7	82.41	75.91	0.95	63.63	69.4	83.05	75.42	0.94	
ArThumbLSTM-frozen	60.41	66.91	84.07	74.75	1.04	70.69	77.3	87.57	81.36	1.01	65.34	71.73	85.49	77.8	1.01	
ArThumbGNN-soft	62.77	68.87	86.15	76.6	0.99	70.92	76.32	87.58	80.37	0.99	66.84	72.62	86.83	78.55	0.99	


##  Table 2

In [53]:
table_ablation = {}

In [54]:
gnn_gmr = finetuning_results.round(2).to_dict()[('ArThumbGNN-soft', 'gmr')]['full']
gnn_cpr = finetuning_results.round(2).to_dict()[('ArThumbGNN-soft', 'cpr')]['full']

lstm_gmr = finetuning_results_frozen.round(2).to_dict()[('ArThumbLSTM', 'gmr')]['full']
lstm_cpr = finetuning_results_frozen.round(2).to_dict()[('ArThumbLSTM', 'cpr')]['full']

table_ablation["Models proposed"] = {
    'ArThumbGNN-soft': {
        'gmr': gnn_gmr,
        'cpr': gnn_cpr
    },
    'ArThumbLSTM-frozen': {
        'gmr': lstm_gmr,
        'cpr': lstm_cpr
    }
}

In [55]:
# soft labels
gnn_gmr = finetuning_results.round(2).to_dict()[('ArThumbGNN', 'gmr')]['full']
gnn_cpr = finetuning_results.round(2).to_dict()[('ArThumbGNN', 'cpr')]['full']

lstm_gmr = finetuning_results_frozen.round(2).to_dict()[('ArThumbLSTM-soft', 'gmr')]['full']
lstm_cpr = finetuning_results_frozen.round(2).to_dict()[('ArThumbLSTM-soft', 'cpr')]['full']

table_ablation["Soft labels ablation"] = {
    'ArThumbGNN': {
        'gmr': gnn_gmr,
        'cpr': gnn_cpr
    },
    'ArThumbLSTM-soft-frozen': {
        'gmr': lstm_gmr,
        'cpr': lstm_cpr
    }
}

In [64]:
ablation_decoder.round(2).T.to_dict()
gnn_gmr = ablation_decoder.round(2).T.to_dict()[('no-frozen', 'ArThumbGNN-soft')]['gmr']
gnn_cpr = ablation_decoder.round(2).T.to_dict()[('no-frozen', 'ArThumbGNN-soft')]['cpr']

lstm_gmr = ablation_decoder.round(2).T.to_dict()[('frozen', 'ArThumbLSTM')]['gmr']
lstm_cpr = ablation_decoder.round(2).T.to_dict()[('frozen', 'ArThumbLSTM')]['cpr']

table_ablation["Without Ar decoder"] = {
    'FFThumbGNN-soft': {
        'gmr': gnn_gmr,
        'cpr': gnn_cpr
    },
    'FFThumbLSTM-frozen': {
        'gmr': lstm_gmr,
        'cpr': lstm_cpr
    }
}

In [57]:
ablation_beam_decoder.T.round(2).to_dict()

ablation_decoder.round(2).T.to_dict()
gnn_gmr = ablation_beam_decoder.round(2).T.to_dict()[('no-frozen', 'ArThumbGNN-soft')]['gmr']
gnn_cpr = ablation_beam_decoder.round(2).T.to_dict()[('no-frozen', 'ArThumbGNN-soft')]['cpr']

lstm_gmr = ablation_beam_decoder.round(2).T.to_dict()[('frozen', 'ArThumbLSTM')]['gmr']
lstm_cpr = ablation_beam_decoder.round(2).T.to_dict()[('frozen', 'ArThumbLSTM')]['cpr']

table_ablation["Without beam decoding"] = {
    'ArThumbGNN-soft': {
        'gmr': gnn_gmr,
        'cpr': gnn_cpr
    },
    'ARThumbLSTM-frozen': {
        'gmr': lstm_gmr,
        'cpr': lstm_cpr
    }
}

In [58]:
ablation_augmentation.round(2).T.to_dict()
gnn_gmr = ablation_augmentation.round(2).T.to_dict()[('no-frozen', 'ArThumbGNN-soft')]['gmr']
gnn_cpr = ablation_augmentation.round(2).T.to_dict()[('no-frozen', 'ArThumbGNN-soft')]['cpr']

lstm_gmr = ablation_augmentation.round(2).T.to_dict()[('frozen', 'ArThumbLSTM')]['gmr']
lstm_cpr = ablation_augmentation.round(2).T.to_dict()[('frozen', 'ArThumbLSTM')]['cpr']

table_ablation["Without data augmentation"] = {
    'ArThumbGNN-soft': {
        'gmr': gnn_gmr,
        'cpr': gnn_cpr
    },
    'ARThumbLSTM-frozen': {
        'gmr': lstm_gmr,
        'cpr': lstm_cpr
    }
}

In [59]:
# ablation frozen
gnn_gmr = finetuning_results_frozen.round(2).to_dict()[('ArThumbGNN-soft', 'gmr')]['full']
gnn_cpr = finetuning_results_frozen.round(2).to_dict()[('ArThumbGNN-soft', 'cpr')]['full']

lstm_gmr = finetuning_results.round(2).to_dict()[('ArThumbLSTM', 'gmr')]['full']
lstm_cpr = finetuning_results.round(2).to_dict()[('ArThumbLSTM', 'cpr')]['full']

table_ablation["Freeze encoder ablation"] = {
    'ArThumbGNN-soft-frozen': {
        'gmr': gnn_gmr,
        'cpr': gnn_cpr
    },
    'ArThumbLSTM': {
        'gmr': lstm_gmr,
        'cpr': lstm_cpr
    }
}

In [60]:
only_PIG.round(2).to_dict()
table_ablation["Only PIG training"] = {
    'ArThumbGNN': {
        'gmr': only_PIG.round(2).to_dict()['gmr']['ArThumbGNN'],
        'cpr': only_PIG.round(2).to_dict()['cpr']['ArThumbGNN']
    },
    'ArThumbLSTM': {
        'gmr': only_PIG.round(2).to_dict()['gmr']['ArThumbLSTM'],
        'cpr': only_PIG.round(2).to_dict()['cpr']['ArThumbLSTM']
    }
}

In [61]:
# only_thumbset.round(2).to_dict()
only_thumbset.round(2).to_dict()
table_ablation["Only ThumbSet training"] = {
    'ArThumbGNN-soft': {
        'gmr': only_thumbset.round(2).to_dict()['gmr']['ArThumbGNN-soft'],
        'cpr': only_thumbset.round(2).to_dict()['cpr']['ArThumbGNN-soft']
    },
    'ArThumbLSTM': {
        'gmr': only_thumbset.round(2).to_dict()['gmr']['ArThumbLSTM'],
        'cpr': only_thumbset.round(2).to_dict()['cpr']['ArThumbLSTM']
    }
}

In [66]:
order_ablation = ['Models proposed', 
                  'Without data augmentation',
                  'Without Ar decoder',
                  'Without beam decoding', 
                  'Soft labels ablation', 
                  'Freeze encoder ablation', 
                  'Only PIG training', 
                  'Only ThumbSet training']
gmr_gnn = table_ablation["Models proposed"]['ArThumbGNN-soft']['gmr']
cpr_gnn = table_ablation["Models proposed"]['ArThumbGNN-soft']['cpr']
gmr_lstm = table_ablation["Models proposed"]['ArThumbLSTM-frozen']['gmr']
cpr_lstm = table_ablation["Models proposed"]['ArThumbLSTM-frozen']['cpr']
best = {
    'GNN':{
        'gmr': gmr_gnn,
        'cpr': cpr_gnn
    },
    'LSTM':{
        'gmr': gmr_lstm,
        'cpr': cpr_lstm
    }
}
for ablation_experiment_name in order_ablation:
    print(ablation_experiment_name)
    for model_name, model_results in table_ablation[ablation_experiment_name].items():
        print(f'{model_name}\t', end='')
        for metric in ['gmr', 'cpr']:
            diff = model_results[metric] - best["GNN" if "GNN" in model_name else "LSTM"][metric]
            print(f'{model_results[metric]}\t{diff:.2f}\t', end='')
        print()
    

Models proposed
ArThumbGNN-soft	66.84	0.00	0.99	0.00	
ArThumbLSTM-frozen	65.34	0.00	1.01	0.00	
Without data augmentation
ArThumbGNN-soft	60.36	-6.48	0.94	-0.05	
ARThumbLSTM-frozen	64.69	-0.65	1.01	0.00	
Without Ar decoder
FFThumbGNN-soft	63.55	-3.29	0.83	-0.16	
FFThumbLSTM-frozen	27.39	-37.95	0.31	-0.70	
Without beam decoding
ArThumbGNN-soft	64.7	-2.14	0.95	-0.04	
ARThumbLSTM-frozen	64.25	-1.09	1.02	0.01	
Soft labels ablation
ArThumbGNN	66.48	-0.36	0.97	-0.02	
ArThumbLSTM-soft-frozen	65.32	-0.02	1.02	0.01	
Freeze encoder ablation
ArThumbGNN-soft-frozen	65.99	-0.85	0.94	-0.05	
ArThumbLSTM	64.01	-1.33	1.05	0.04	
Only PIG training
ArThumbGNN	60.53	-6.31	0.87	-0.12	
ArThumbLSTM	26.82	-38.52	0.44	-0.57	
Only ThumbSet training
ArThumbGNN-soft	65.24	-1.60	0.97	-0.02	
ArThumbLSTM	64.38	-0.96	1.01	0.00	
