In [1]:
import os
import pickle
import numpy as np

from edist import sed

In [2]:
def get_metrics(y_true, y_pred):
    assert len(y_true) == len(y_pred)
    
    wer = 1 - np.mean([prediction == target for prediction, target in zip(y_pred, y_true)]).item()
    edit_distances = [sed.standard_sed(prediction, target) for prediction, target in zip(y_pred, y_true)]
    
    edit_distance = np.mean(edit_distances).item()
    normalised_edit_distance = np.mean(
        [distance / len(target) for distance, target in zip(edit_distances, y_true)]
    ).item()
    
    return {
        "wer": wer,
        "ed": edit_distance,
        "ned": normalised_edit_distance
    }

In [3]:
def parse_filename(filename: str):
    filename = filename.split(".")[0]
    
    filename = filename.replace("non-autoregressive-lstm", "non_autoregressive_lstm")
    filename = filename.replace("non-autoregressive-fixed", "non_autoregressive_fixed")
    filename = filename.replace("non-autoregressive-position", "non_autoregressive_position")
    
    entries = filename.split("-")[1:]
    entries = [tuple(entry.split("=")) for entry in entries]
    
    return {key: value for key, value in entries}

In [4]:
predictions_path = "results/evaluation/predictions/"
results = dict()

for prediction_file_name in os.listdir(predictions_path):
    if not prediction_file_name.endswith(".pickle"):
        continue
    
    with open(os.path.join(predictions_path, prediction_file_name), "rb") as pf:
        predictions, targets = pickle.load(pf)
        predictions = [prediction.prediction for prediction in predictions]
        predictions = [
            [symbol for symbol in prediction if not (symbol.startswith("<") or symbol.endswith(">"))]
            for prediction in predictions
        ]
    
    entry_name = parse_filename(prediction_file_name)
    entry_name = tuple([entry_name[key] for key in sorted(entry_name.keys())])
    results[entry_name] = get_metrics(targets, predictions)

In [5]:
entry_name = parse_filename(prediction_file_name)
tuple([key for key in sorted(entry_name.keys())])

('augment_mask_languages', 'augment_shuffle', 'dataset', 'model', 'trial')

In [6]:
list(results.keys())[:3]

[('True', 'False', 'felekesemitic', 'non_autoregressive_fixed', '2'),
 ('True', 'False', 'mannburmish', 'non_autoregressive_position', '3'),
 ('True', 'False', 'kesslersignificance', 'autoregressive', '4')]

In [7]:
dataset_names = list(sorted(set([key[2] for key in results])))

In [8]:
model_names = list(sorted(set([key[3] for key in results])))

In [9]:
model_names

['autoregressive',
 'non_autoregressive_fixed',
 'non_autoregressive_lstm',
 'non_autoregressive_position']

In [10]:
dataset_names

['bantubvd',
 'beidazihui',
 'birchallchapacuran',
 'bodtkhobwa',
 'davletshinaztecan',
 'felekesemitic',
 'hattorijaponic',
 'kesslersignificance',
 'listsamplesize',
 'luangthongkumkaren',
 'mannburmish']

In [11]:
round(3.2345, 2)

3.23

In [12]:
def make_dataset_rows(dataset_name: str, metric: str):
    def make_row(shuffle: str, mask: str):
        shuffle = str(shuffle)
        mask = str(mask)
        
        min_values = []
        median_values = []
        
        for model_name in model_names:
            trial_metrics = []
            
            for trial in range(1, 6):
                metrics = results[(mask, shuffle, dataset_name, model_name, str(trial))]
                metric_value = metrics[metric]
                trial_metrics.append(metric_value)
            
            min_values.append(min(trial_metrics))
            median_values.append(np.median(trial_metrics).item())
        
        row = []
        best_min_value = min(min_values)
        best_median_value = min(median_values)
        
        for min_value, median_value in zip(min_values, median_values):
            if min_value == best_min_value:
                min_value = f"{{\\color{{red}} \\textbf{{{round(min_value, 2)}}}}}"
                # min_value = str(round(min_value, 2))
            else:
                min_value = str(round(min_value, 2))
            
            if median_value == best_median_value:
                median_value = f"{{\\cellcolor{{blue!25}} {round(median_value, 2)}}}"
                # median_value = str(round(median_value, 2))
            else:
                median_value = str(round(median_value, 2))
            
            row.append(f"{min_value} & {median_value}")
        
        return " & ".join(row)
    
    rows = []
    rows.append(f"{{\\small {dataset_name}}}" + " & " + make_row(False, False))
    rows.append("\\quad + shuffle & " + make_row(True, False))
    rows.append("\\quad + mask & " + make_row(False, True))
    rows.append("\\quad + both & " + make_row(True, True))
    
    return "\\\\ \n".join(rows)

In [13]:
def make_table(metric: str):
    table = ""
    table += "\\begin{tabular}{" + 4 * 2 * "c" + "} \n"
    table += " & " + " & ".join([f"\\multicolumn{{2}}{{c}}{{{model_name}}}" for model_name in model_names])
    table += " \\\\ \n"
    table += " & " + " & ".join(["$\\min$. & med."] * 4) + " \\\\ \n \\midrule \n"
    
    for dataset_name in dataset_names:
        table += make_dataset_rows(dataset_name, metric) + "\\\\ \n \midrule \n"
    
    return table

In [14]:
table = make_table("ed")

In [15]:
print(table)

\begin{tabular}{cccccccc} 
 & \multicolumn{2}{c}{autoregressive} & \multicolumn{2}{c}{non_autoregressive_fixed} & \multicolumn{2}{c}{non_autoregressive_lstm} & \multicolumn{2}{c}{non_autoregressive_position} \\ 
 & $\min$. & med. & $\min$. & med. & $\min$. & med. & $\min$. & med. \\ 
 \midrule 
{\small bantubvd} & 0.96 & 1.0 & 0.89 & {\cellcolor{blue!25} 0.92} & {\color{red} \textbf{0.88}} & 0.99 & 0.97 & 1.01\\ 
\quad + shuffle & 0.94 & 1.02 & {\color{red} \textbf{0.82}} & {\cellcolor{blue!25} 0.93} & 0.91 & 0.95 & 0.98 & 1.0\\ 
\quad + mask & 0.9 & 0.93 & 0.85 & 0.9 & 0.85 & 0.91 & {\color{red} \textbf{0.84}} & {\cellcolor{blue!25} 0.88}\\ 
\quad + both & 0.91 & 0.96 & {\color{red} \textbf{0.81}} & {\cellcolor{blue!25} 0.9} & 0.86 & 0.9 & 0.83 & 0.93\\ 
 \midrule 
{\small beidazihui} & 0.49 & 0.51 & 0.48 & {\cellcolor{blue!25} 0.51} & {\color{red} \textbf{0.45}} & 0.53 & 0.5 & 0.52\\ 
\quad + shuffle & {\color{red} \textbf{0.49}} & 0.53 & 0.53 & 0.55 & 0.51 & 0.56 & 0.51 & {\cellcolo

## Get best hyperparameters

In [16]:
import pandas as pd

from cognate_prediction_experiment import get_hyperparameters

In [17]:
hyperparameters = get_hyperparameters()
model_names = list(sorted(hyperparameters.keys()))

hyperparameter_table = pd.DataFrame.from_records(
    [
        {
            key: (round(val, 4) if isinstance(val, float) else val)
            for key, val in hyperparameters[model_name].items()
        } 
        for model_name in model_names
    ],
    index=model_names
).T

In [18]:
print(hyperparameter_table.to_latex())

\begin{tabular}{lrrrr}
\toprule
{} &  autoregressive &  non-autoregressive-fixed &  non-autoregressive-lstm &  non-autoregressive-position \\
\midrule
batch\_size  &         16.0000 &                    5.0000 &                  22.0000 &                      22.0000 \\
dropout     &          0.3726 &                    0.2831 &                   0.2933 &                       0.3884 \\
epochs      &         28.0000 &                   26.0000 &                  37.0000 &                      37.0000 \\
gamma       &          0.9504 &                    0.9116 &                   0.9034 &                       0.9445 \\
hidden\_size &        367.0000 &                  157.0000 &                 353.0000 &                     176.0000 \\
lr          &          0.0015 &                    0.0013 &                   0.0016 &                       0.0027 \\
num\_layers  &          1.0000 &                    1.0000 &                   1.0000 &                       1.0000 \\
\bottomrule
\

  print(hyperparameter_table.to_latex())


## Get Ranking

In [19]:
ranking_scores = {model_type.replace("-", "_"): 0 for model_type in  model_names}

for dataset_name in dataset_names:
    for do_mask in [str(False), str(True)]:
        for do_shuffle in [str(False), str(True)]:
            for i, model_name_1 in enumerate(model_names):
                model_name_1 = model_name_1.replace("-", "_")
                for model_name_2 in model_names[i+1:]:
                    model_name_2 = model_name_2.replace("-", "_")
                    
                    for trial_1 in range(1, 6):
                        for trial_2 in range(1, 6):
                            score_1 = results[(do_shuffle, do_mask, dataset_name, model_name_1, str(trial_1))]
                            score_1 = score_1["ed"]
                            
                            score_2 = results[(do_shuffle, do_mask, dataset_name, model_name_2, str(trial_2))]
                            score_2 = score_2["ed"]
                            
                            if score_1 < score_2:
                                ranking_scores[model_name_1] += 1
                                ranking_scores[model_name_2] -= 1
                            elif score_1 > score_2:
                                ranking_scores[model_name_1] -= 1
                                ranking_scores[model_name_2] += 1


In [20]:
ranking_scores

{'autoregressive': -1124,
 'non_autoregressive_fixed': 593,
 'non_autoregressive_lstm': 726,
 'non_autoregressive_position': -195}

## Data Augmentation Effect

In [21]:
from itertools import product

In [22]:
def params_to_method_name(shuffle, mask):
    if shuffle and mask:
        return "both"
    elif shuffle and not mask:
        return "shuffle"
    elif not shuffle and mask:
        return "mask"
    else:
        return "none"

In [23]:
abs_gains = {
    params_to_method_name(shuffle, mask): {model_name.replace("-", "_"): [] for model_name in model_names}
    for shuffle, mask in product([False, True], [False, True])
}

perc_gains = {
    params_to_method_name(shuffle, mask): {model_name.replace("-", "_"): [] for model_name in model_names}
    for shuffle, mask in product([False, True], [False, True])
}


for dataset_name in dataset_names:
    for do_mask in [False, True]:
        for do_shuffle in [False, True]:
            for model_name in model_names:
                model_name = model_name.replace("-", "_")
                
                baseline_score = np.mean(
                    [results[(str(False), str(False), dataset_name, model_name, str(trial))]["ed"]
                     for trial in range(1, 6)
                    ]
                )
                augmentation_score = np.mean(
                    [results[(str(do_mask), str(do_shuffle), dataset_name, model_name, str(trial))]["ed"]
                     for trial in range(1, 6)
                    ]
                )
                
                diff = augmentation_score - baseline_score
                perc = ((baseline_score - augmentation_score) / baseline_score) * 100
                
                abs_gains[params_to_method_name(do_shuffle, do_mask)][model_name].append(diff)
                perc_gains[params_to_method_name(do_shuffle, do_mask)][model_name].append(perc)
                        

In [24]:
augmentation_techniques = list(sorted(abs_gains.keys()))
augmentation_table = dict()

for technique in augmentation_techniques:
    augmentation_table[technique + "+ abs"] = dict()
    # augmentation_table[technique + "+ perc"] = dict()

for technique in augmentation_techniques:
    for model_name in model_names:
        augmentation_table[technique + "+ abs"][model_name] = np.mean(
            abs_gains[technique][model_name.replace("-", "_")]
        )
        # augmentation_table[technique + "+ perc"][model_name] = np.mean(
        #    perc_gains[technique][model_name.replace("-", "_")]
        #)

augmentation_table = pd.DataFrame.from_dict(augmentation_table).T.round(3)

In [25]:
print(augmentation_table.to_latex())

\begin{tabular}{lrrrr}
\toprule
{} &  autoregressive &  non-autoregressive-fixed &  non-autoregressive-lstm &  non-autoregressive-position \\
\midrule
both+ abs    &          -0.007 &                    -0.012 &                   -0.062 &                       -0.090 \\
mask+ abs    &          -0.006 &                    -0.060 &                   -0.077 &                       -0.108 \\
none+ abs    &           0.000 &                     0.000 &                    0.000 &                        0.000 \\
shuffle+ abs &           0.038 &                    -0.003 &                   -0.014 &                       -0.026 \\
\bottomrule
\end{tabular}



  print(augmentation_table.to_latex())


## Examples

In [26]:
import os

In [27]:
pred_file_name = "cognates-model=non-autoregressive-lstm-dataset=listsamplesize-augment_shuffle=False"
pred_file_name = pred_file_name + "-augment_mask_languages=True-trial=1.pickle"

pred_file_name = f"results/evaluation/predictions/{pred_file_name}"

with open(pred_file_name, "rb") as pf:
    predictions = pickle.load(pf)

In [28]:
mother_pred = predictions[0][6]

In [29]:
mother_pred

TransducerPrediction(prediction=['<SOS>', 'm', 'ɔ', 'd', 'ə', 'r', '<EOS>'], alignment=[AlignmentPosition(symbol=['<SOS>', '<SOS>', '<SOS>'], actions=[<actions.Substitution object at 0x7f79c097c490>], predictions=['<SOS>']), AlignmentPosition(symbol=['m', 'm', 'm'], actions=[<actions.Substitution object at 0x7f79c07cf280>], predictions=['m']), AlignmentPosition(symbol=['ʌ', 'ɛ', 'ʊ'], actions=[<actions.Substitution object at 0x7f79c07e89a0>], predictions=['ɔ']), AlignmentPosition(symbol=['ð', '-', 't'], actions=[<actions.Substitution object at 0x7f79c07e86a0>], predictions=['d']), AlignmentPosition(symbol=['ə', '-', 'ə'], actions=[<actions.Substitution object at 0x7f79c07e8970>], predictions=['ə']), AlignmentPosition(symbol=['r', 'ʀ', 'r'], actions=[<actions.Substitution object at 0x7f79c07e8610>], predictions=['r']), AlignmentPosition(symbol=['<EOS>', '<EOS>', '<EOS>'], actions=[<actions.Substitution object at 0x7f79c07e8220>], predictions=['<EOS>'])])

In [30]:
pred = ""
source = ["", "", ""]

for position in mother_pred.alignment[1:-1]:
    pred = pred + " & " + " ".join(position.predictions)
    
    for i, symbol in enumerate(position.symbol):
        source[i] = source[i] + " & " + symbol

print(" & m & uː & d & ə & r")
print(pred)
print()
for s in source:
    print(s)

 & m & uː & d & ə & r
 & m & ɔ & d & ə & r

 & m & ʌ & ð & ə & r
 & m & ɛ & - & - & ʀ
 & m & ʊ & t & ə & r


In [31]:
horn_pred = predictions[0][122]

pred = ""
source = ["", "", "", ""]

for position in horn_pred.alignment[1:-1]:
    pred = pred + " & " + " ".join(position.predictions)
    
    for i, symbol in enumerate(position.symbol):
        source[i] = source[i] + " & " + symbol

print(" & b & a & ʀ &  b")
print(pred)
print()
for s in source:
    print(s)

 & b & a & ʀ &  b
 & b & ɑ̃ & ʀ &  & t

 & b & aː & r & - & t
 & b & ɪ & - & ə & d
 & b & aː & r & - & t



## Multilingual

In [38]:
def get_targets(dataset_name: str):
    with open(f"data/{dataset_name}/solutions-0.10.tsv") as tf:
        test_targets = []
        for line in tf:
            entries = line.strip().split("\t")
            entries = [entry.strip() for entry in entries if entry.strip()]
            if len(entries) == 2:
                test_targets.append(entries[1].split())
        return test_targets

In [39]:
predictions_path = "results/multilingual/predictions/"
results = dict()

for prediction_file_name in os.listdir(predictions_path):
    if not prediction_file_name.endswith(".pickle"):
        continue
    
    with open(os.path.join(predictions_path, prediction_file_name), "rb") as pf:
        predictions, _ = pickle.load(pf)
        predictions = [prediction.prediction for prediction in predictions]
        predictions = [
            [symbol for symbol in prediction if not (symbol.startswith("<") or symbol.endswith(">"))]
            for prediction in predictions
        ]
    
    entry_name = parse_filename(prediction_file_name)
    entry_name = tuple([entry_name[key] for key in sorted(entry_name.keys())])
    targets = get_targets(entry_name[2])
    
    results[entry_name] = get_metrics(targets, predictions)

In [59]:
table = pd.DataFrame(
    index=dataset_names,
    columns=pd.MultiIndex.from_tuples(list(product(model_names, ["min", "median"])))
)

for dataset_name in dataset_names:
    for model_name in model_names:
        scores = [
            results[("True", "False", dataset_name, model_name.replace("-", "_"), str(trial))]["ed"]
            for trial in range(1, 6)
        ]
        table.loc[dataset_name][(model_name, "min")] = round(np.min(scores).item(), 2)
        table.loc[dataset_name][(model_name, "median")] = round(np.median(scores).item(), 2)

In [57]:
table.round(3)

Unnamed: 0_level_0,autoregressive,autoregressive,non-autoregressive-fixed,non-autoregressive-fixed,non-autoregressive-lstm,non-autoregressive-lstm,non-autoregressive-position,non-autoregressive-position
Unnamed: 0_level_1,min,median,min,median,min,median,min,median
bantubvd,0.859,0.935,0.819,0.839,0.758,0.819,0.81,0.903
beidazihui,0.472,0.504,0.571,0.581,0.445,0.455,0.578,0.591
birchallchapacuran,1.701,1.75,1.505,1.565,1.451,1.554,1.647,1.652
bodtkhobwa,0.395,0.405,0.448,0.452,0.37,0.376,0.459,0.478
davletshinaztecan,1.926,2.046,1.861,1.917,1.852,1.935,1.88,2.046
felekesemitic,1.426,1.45,1.361,1.421,1.474,1.487,1.363,1.382
hattorijaponic,0.843,0.904,0.846,0.868,0.7,0.768,0.843,0.907
kesslersignificance,2.475,2.545,2.434,2.515,2.525,2.545,2.434,2.535
listsamplesize,2.119,2.175,2.222,2.247,2.098,2.186,2.216,2.232
luangthongkumkaren,0.316,0.368,0.411,0.438,0.306,0.322,0.474,0.503


In [60]:
print(table.to_latex())

\begin{tabular}{lllllllll}
\toprule
{} & \multicolumn{2}{l}{autoregressive} & \multicolumn{2}{l}{non-autoregressive-fixed} & \multicolumn{2}{l}{non-autoregressive-lstm} & \multicolumn{2}{l}{non-autoregressive-position} \\
{} &            min & median &                      min & median &                     min & median &                         min & median \\
\midrule
bantubvd            &           0.86 &   0.94 &                     0.82 &   0.84 &                    0.76 &   0.82 &                        0.81 &    0.9 \\
beidazihui          &           0.47 &    0.5 &                     0.57 &   0.58 &                    0.45 &   0.46 &                        0.58 &   0.59 \\
birchallchapacuran  &            1.7 &   1.75 &                     1.51 &   1.57 &                    1.45 &   1.55 &                        1.65 &   1.65 \\
bodtkhobwa          &            0.4 &    0.4 &                     0.45 &   0.45 &                    0.37 &   0.38 &                        0.46 &  

  print(table.to_latex())
