# Perplexity

This file only gathers the results of runs. The main calculation can be found in `iw_perplexity.py`

In [1]:
import sys; sys.path.append("../")

In [2]:
from dataset_wrappper import NewsData
from utils_train import transfer_batch_to_device
import os
from run_validation import load_model_for_eval
from pathlib import Path
import matplotlib.pyplot as plt
from transformers import RobertaTokenizerFast
import torch
import torch.nn.functional as F
from utils_evaluation import tokenizer_batch_decode
import numpy as np
import pickle
import copy
import tqdm

In [3]:
DEVICE = "cuda:0"
RESULT_DIR = Path("result-files")
CHECKPOINT_TYPE = "best"

In [4]:
def get_clean_name(run_name):
    latent_size = run_name.split("-")[4][-2:]
    if "autoencoder" in run_name:
        FB = "autoencoder"
    else:
        FB = run_name.split("-")[6]
        if len(FB) == 3:
            FB += "0"
        FB = "FB-" + FB
    clean_name = f"NZ-{latent_size} | {FB}"
    return clean_name

In [5]:
PTB_run_name_paths = {}
for r in os.listdir("../Runs"):
    if "PTB" in r:
        path = Path("../Runs") / r / f"checkpoint-{CHECKPOINT_TYPE}.pth"
        PTB_run_name_paths[r] = path

# 1 Importance weighted perplexity calculation
Useful blog post on this matter: https://bjlkeng.github.io/posts/importance-sampling-and-estimating-marginal-likelihood-in-variational-autoencoders/

Calculation has been moved to `iw_perplexity.py`

In [6]:
# Get all results from different cuda devices and combine

d = Path("result-files/ppl-results-lisa")

all_results = {"teacherforced":{}, "autoregressive":{}}

for f in os.listdir(d):
    r = pickle.load( open( d / f, "rb" ) )
    name = f.split("_")[2].split(".pickle")[0]
    mode = f.split("_")[1]
    if name in all_results[mode]:
        for k, v in r.items():
            if type(all_results[mode][name][k]) == list:
                all_results[mode][name][k].extend(v)
            else:
                all_results[mode][name][k] = np.concatenate([all_results[mode][name][k], v], axis=0)
    else:
        all_results[mode][name] = r
        
print("teacherforced")
for k in sorted(list(all_results["teacherforced"].keys())):
    print(k)

print("\nautoregressive")
for k in sorted(list(all_results["autoregressive"].keys())):
    print(k)

teacherforced
2021-02-02-PTB-latent32-FB-0.25-run-13:16:32
2021-02-02-PTB-latent32-FB-0.75-run-12:44:32
2021-02-02-PTB-latent64-FB-0.25-run-13:17:02
2021-02-02-PTB-latent64-FB-0.75-run-13:16:36
2021-02-03-PTB-latent32-FB-0.00-run-14:32:09
2021-02-03-PTB-latent32-FB-0.5-run-09:31:02
2021-02-03-PTB-latent32-FB-1.0-run-11:43:17
2021-02-03-PTB-latent32-FB-1.50-run-12:13:36
2021-02-03-PTB-latent32-autoencoder-run-17:30:41
2021-02-03-PTB-latent64-FB-0.00-run-17:14:10
2021-02-03-PTB-latent64-FB-0.50-run-12:29:58
2021-02-03-PTB-latent64-FB-1.0-run-13:06:00
2021-02-03-PTB-latent64-FB-1.50-run-13:22:14
2021-02-03-PTB-latent64-autoencoder-run-18:25:57

autoregressive
2021-02-02-PTB-latent32-FB-0.25-run-13:16:32
2021-02-02-PTB-latent32-FB-0.75-run-12:44:32
2021-02-02-PTB-latent64-FB-0.25-run-13:17:02
2021-02-02-PTB-latent64-FB-0.75-run-13:16:36
2021-02-03-PTB-latent32-FB-0.00-run-14:32:09
2021-02-03-PTB-latent32-FB-0.5-run-09:31:02
2021-02-03-PTB-latent32-FB-1.0-run-11:43:17
2021-02-03-PTB-latent3

In [7]:
import pandas as pd

def add_string_cols(df):

    df["Rate"] = df["rate_mean"].map('{:,.2f}'.format) + df["rate_std"].map(' \tiny{{({:,.2f})}}'.format)
    df["-ELBO"] = df["-elbo_mean"].map('{:,.2f}'.format) + df["-elbo_std"].map(' \tiny{{({:,.2f})}}'.format)
    df = df.sort_values('-elbo_mean')
    return df


for mode in ["teacherforced", "autoregressive"]:
    ppl_results = {}
    
    for n, r in all_results[mode].items():
    #     print(get_clean_name(n))
        mean_ppl, std_ppl = np.mean(r["PPL"]), np.std(r["PPL"])
        ppl_results[get_clean_name(n)] = {"ppl_mean": mean_ppl, "ppl_std": std_ppl}
    #     print(f"Perplexity: {mean_ppl:.2f} ({std_ppl:.2f})")
    #     print()

    df = pd.DataFrame(ppl_results).transpose()
    df["Perplexity"] = df["ppl_mean"].map('{:.2e}'.format) + df["ppl_std"].map(' \tiny{{({:.2e})}}'.format)
    df = df.sort_values("ppl_mean")
    df = df.reset_index()
    display(df[["index", "Perplexity"]])

    if mode == "teacherforced":
        df.to_csv(RESULT_DIR / "perplexity_results_TF.csv")
    else:
        df.to_csv(RESULT_DIR / "perplexity_results_AR.csv")

Unnamed: 0,index,Perplexity
0,NZ-32 | FB-0.00,3.20e+01 \tiny{(3.76e+00)}
1,NZ-64 | FB-0.00,3.20e+01 \tiny{(3.76e+00)}
2,NZ-32 | FB-0.25,3.35e+01 \tiny{(4.24e+00)}
3,NZ-64 | FB-0.25,3.45e+01 \tiny{(4.35e+00)}
4,NZ-32 | FB-0.50,3.62e+01 \tiny{(4.60e+00)}
5,NZ-32 | FB-0.75,4.10e+01 \tiny{(5.28e+00)}
6,NZ-64 | FB-0.50,4.32e+01 \tiny{(5.62e+00)}
7,NZ-32 | FB-1.00,5.07e+01 \tiny{(6.94e+00)}
8,NZ-64 | FB-0.75,6.46e+01 \tiny{(9.32e+00)}
9,NZ-32 | FB-1.50,6.81e+01 \tiny{(9.80e+00)}


Unnamed: 0,index,Perplexity
0,NZ-64 | FB-0.25,6.18e+03 \tiny{(9.63e+02)}
1,NZ-32 | FB-0.25,6.69e+03 \tiny{(9.33e+02)}
2,NZ-32 | FB-0.50,8.03e+03 \tiny{(1.40e+03)}
3,NZ-64 | FB-0.50,9.76e+03 \tiny{(1.83e+03)}
4,NZ-32 | FB-0.75,1.08e+04 \tiny{(1.98e+03)}
5,NZ-32 | FB-1.00,1.15e+04 \tiny{(2.45e+03)}
6,NZ-64 | FB-0.75,1.33e+04 \tiny{(3.17e+03)}
7,NZ-32 | FB-1.50,1.47e+04 \tiny{(3.38e+03)}
8,NZ-64 | FB-1.00,1.49e+04 \tiny{(3.50e+03)}
9,NZ-64 | FB-1.50,2.64e+04 \tiny{(6.64e+03)}
