In [1]:
import pickle as pkl
import numpy as np
import torch
"""
for dataset in ['iwslt14', 'multi30k']:
    logs = pkl.load(open('data/{dataset}_logs.pkl'.format(dataset=dataset), 'rb'))

    num_datapoints = len(logs['sequences'])

    iterations = logs['normal_A'].keys()
    dicts = logs['sequences']
    for key in logs:
        if key == 'sequences':
            continue
        for iteration in iterations:
            for datapoint_idx in range(num_datapoints):
                dicts[datapoint_idx][(key, iteration, 'alpha')] = logs[key][iteration][datapoint_idx]['alpha']
                dicts[datapoint_idx][(key, iteration, 'beta')] = logs[key][iteration][datapoint_idx]['beta']
                dicts[datapoint_idx]['split'] = logs[key][iteration][datapoint_idx]['split']
    pkl.dump(dicts, open('data/{dataset}_logs_rz.pkl'.format(dataset=dataset), 'wb'))
    """
from utils import *

def max_acc_iter(name, metas, key):
    max_acc = -1.0
    max_acc_iter = None
    for key_ in metas:
        if key_[0] == name and key_[2] == key:
            if metas[key_] > max_acc:
                max_acc = metas[key_]
                max_acc_iter = key_[1]
    return max_acc_iter

def passing_idx(A1s, A2):
    for i in range(len(A1s)):
        if A1s[i] > A2:
            return i
    return None

def corrs_iter(dicts, key1, keys2, corr_metric, reverse=False):
    corrs = []
    baselines = []
    for key2 in keys2:
        if reverse:
                vals = corr_metric.eval_corr(dicts, key2, key1)
        else:
            vals = corr_metric.eval_corr(dicts, key1, key2)
        corrs.append(vals['correlation'])
        baselines.append(vals['baseline'])
    return corrs, baselines

def acc_iter(metas, keys):
    accs = []
    for key in keys:
        accs.append(metas[key])
    return accs

def max_corr(dicts, key1, keys2, metric, reverse=False):
    return max(corrs_iter(dicts, key1, keys2, metric, reverse=reverse)[0])

def impute_beta(dicts, beta_matrix, key_name):
    for item in dicts:
        betas = []
        for tok_trg in item['trg'][1:]:
            beta = []
            for tok_src in item['src']:
                beta.append(beta_matrix[tok_src][tok_trg])
            betas.append(beta)
        betas = np.array(betas)
        item[key_name] = betas

In [2]:
dataset = 'multi30k'
dat = pkl.load(open('outputs/{dataset}_logs.pkl'.format(dataset=dataset), 'rb'))
all_dicts = dat['data']
subset = 'val'
acc_metric = 'val_bleu'
dicts = [d for d in all_dicts if d['split'] == subset]
iterations = sorted(list(set([key[1] for key in dat['metas']])))

In [3]:
embed_beta = pkl.load(open('outputs/{dataset}embedding256translation.pkl'.format(dataset=dataset), 'rb'))
impute_beta(dicts, embed_beta, 'embed_beta')

In [4]:
metric = TopPercentMatch(p=5)
normalA_iter = max_acc_iter('normal_A', dat['metas'], acc_metric)
normalB_iter = max_acc_iter('normal_B', dat['metas'], acc_metric)
uniform_iter = max_acc_iter('uniform', dat['metas'], acc_metric)

In [5]:
gold_alpha_key = ('normal_A', normalA_iter, 'alpha')
gold_grad_key = ('normal_A', normalA_iter, 'grad')

normal_keys = [('normal_B', iter_, 'alpha') for iter_ in iterations]
acc_keys = [('normal_B', iter_, acc_metric) for iter_ in iterations]

alpha_corrs, alpha_baseline = corrs_iter(dicts, gold_alpha_key, normal_keys, metric)
alpha_perfs = acc_iter(dat['metas'], acc_keys)

avg_corr = np.array(alpha_corrs)
avg_perf = np.array(alpha_perfs)
baseline = alpha_baseline[0]

In [11]:
print(dicts[0][gold_grad_key].shape)
print(dicts[0][gold_alpha_key].shape)

(14, 14, 41)
(14, 14)


In [7]:
beta_unif_keys = [('uniform', iter_, 'beta') for iter_ in iterations]
beta_corr_unif = max_corr(dicts, gold_alpha_key, beta_unif_keys, metric)
beta_corr_grad = max_corr(dicts, gold_grad_key, beta_unif_keys, metric)
beta_corr_px = max_corr(dicts, 'embed_beta', beta_unif_keys, metric)

KeyboardInterrupt: 

In [None]:
best_acc = dat['metas'][('normal_A', normalA_iter, acc_metric)]
idx_unif = passing_idx(avg_corr, beta_corr_unif)
idx_grad = passing_idx(avg_corr, beta_corr_grad)
idx_px = passing_idx(avg_corr, beta_corr_px)
def print_perf(idx):
    if idx is None:
        return None
    else:
        return avg_perf[idx]
print(list(zip(iterations, avg_corr)))
print(idx_unif, idx_grad, idx_px)
print(beta_corr_unif, beta_corr_px, beta_corr_grad, baseline, print_perf(idx_unif), print_perf(idx_px), print_perf(idx_grad), best_acc)



In [31]:
# metrics.append(SpearmanRankCorr()) comment out because this takes a lot of time to evaluate
key1, key2 = ('normal_A', normalA_iter, 'alpha'), ('normal_B', normalB_iter, 'beta')
print(key1, key2)
for metric in metrics:
    print(metric.eval_corr(dicts, key1, key2))

('normal_A', 98000, 'alpha') ('normal_B', 98000, 'beta')
{'name': 'top 1 match', 'correlation': 0.4906809735378769, 'baseline': 0.029517236240347983}
{'name': 'top 3 match', 'correlation': 0.630424455807975, 'baseline': 0.08793696034872998}
{'name': 'top 5 match', 'correlation': 0.6835303322435521, 'baseline': 0.1448337850802433}
{'name': 'top 5% match', 'correlation': 0.6016337403712707, 'baseline': 0.06422257616825476}


In [32]:
key1, key2 = ('normal_A', iterations[-1], 'alpha'), ('uniform', iterations[-1], 'beta')
print(key1, key2)
for metric in metrics:
    print(metric.eval_corr(dicts, key1, key2))

('normal_A', 98000, 'alpha') ('uniform', 98000, 'beta')
{'name': 'top 1 match', 'correlation': 0.26383882415402243, 'baseline': 0.031557083112117065}
{'name': 'top 3 match', 'correlation': 0.3738182393979192, 'baseline': 0.0881372192881959}
{'name': 'top 5 match', 'correlation': 0.43500433118171405, 'baseline': 0.14561619209955198}
{'name': 'top 5% match', 'correlation': 0.34707202801762277, 'baseline': 0.06557315971349}


In [33]:
key1, key2 = ('uniform', iterations[-1], 'beta'), 'embed256_beta'
print(key1, key2)
for metric in metrics:
    print(metric.eval_corr(dicts, key1, key2))

('uniform', 98000, 'beta') embed256_beta
{'name': 'top 1 match', 'correlation': 0.34697888432484797, 'baseline': 0.040349847710062316}
{'name': 'top 3 match', 'correlation': 0.41936084798017903, 'baseline': 0.09972895185402521}
{'name': 'top 5 match', 'correlation': 0.47952701632808936, 'baseline': 0.1557176255809838}
{'name': 'top 5% match', 'correlation': 0.39109173722301394, 'baseline': 0.07727200752601038}


In [34]:
key1, key2 = ('uniform', iterations[-1], 'beta'), 'IBM_beta'
print(key1, key2)
for metric in metrics:
    print(metric.eval_corr(dicts, key1, key2))

('uniform', 98000, 'beta') IBM_beta


KeyError: 'IBM_beta'