In [1]:
%load_ext autoreload

In [2]:
import json
import pickle
import pandas as pd
from glob import glob
from pathlib import Path
import scipy.sparse as sp
from tqdm.notebook import tqdm
from gensim.models.word2vec import Word2Vec

import sys
sys.path.append('../../')

import src.test.util as test_util

# Imports

In [3]:
path_to_test_data = Path("../../data") / "test"
path_to_tppmi_model = Path("../../data") / "ppmi-matrices" / "nyt-data"
path_to_twec_model = Path("../../model") / "nyt-data" / "cade" / "model"
path_to_static_model = Path("../../model") / "nyt-data" / "static"

# Setup

## Testsets

### Testset 1

Based on publicly recorded knowledge that for each year lists different names for a particular role, such as U.S. president, U.K. prime minister, NFL superbowl champion team, and so on.

In [4]:
test_data_1_all = pd.read_csv(path_to_test_data / "testset_1.csv")

In [5]:
test_data_1_all.columns = ['truth', 'equivalent']

In [6]:
test_data_1_all = test_data_1_all.sort_values(by='truth', ascending=True)

In [7]:
test_cases_1_all = test_data_1_all['truth'].unique()

In [8]:
print("Testset")
print(f"Testcases (all): {len(test_data_1_all)}")
print(f"Testcases (unique): {len(test_cases_1_all)}")

Testset
Testcases (all): 11027
Testcases (unique): 499


Now we want to split the testset into static & dynamic testcases as was done by Di Carlo et al. in their paper "Training Temporal Word Embeddings with a Compass"

In [9]:
# Splitting the columns into words and years
split_truth = test_data_1_all['truth'].str.split('-', expand=True)
split_equivalent = test_data_1_all['equivalent'].str.split('-', expand=True)

# Creating masks for "static" and "dynamic" conditions
static_mask = split_truth[0] == split_equivalent[0]
dynamic_mask = split_truth[0] != split_equivalent[0]

# Applying the masks to create the separate DataFrames
test_data_1_static = test_data_1_all[static_mask]
test_data_1_dynamic = test_data_1_all[dynamic_mask]

In [10]:
test_cases_1_static = test_data_1_static['truth'].unique()
test_cases_1_dynamic = test_data_1_dynamic['truth'].unique()

In [11]:
print("Static")
print(f"Testcases (all): {len(test_data_1_static)}")
print(f"Testcases (unique): {len(test_cases_1_static)}")

Static
Testcases (all): 2937
Testcases (unique): 443


In [12]:
print("Dynamic")
print(f"Testcases (all): {len(test_data_1_dynamic)}")
print(f"Testcases (unique): {len(test_cases_1_dynamic)}")

Dynamic
Testcases (all): 8090
Testcases (unique): 499


## Models

### TWEC

In [13]:
model_filenames_cade = glob(str(path_to_twec_model / "*_data.model"))

In [14]:
# load models
models_cade = {f"model_{model_file.split('_data')[0][-4:]}":Word2Vec.load(model_file) for model_file in tqdm(model_filenames_cade)}

  0%|          | 0/27 [00:00<?, ?it/s]

In [15]:
models_cade = {model_key: models_cade[model_key] for model_key in sorted(models_cade, key=lambda x: int(x.split('_')[1]))}

In [16]:
models_cade.keys()

dict_keys(['model_1990', 'model_1991', 'model_1992', 'model_1993', 'model_1994', 'model_1995', 'model_1996', 'model_1997', 'model_1998', 'model_1999', 'model_2000', 'model_2001', 'model_2002', 'model_2003', 'model_2004', 'model_2005', 'model_2006', 'model_2007', 'model_2008', 'model_2009', 'model_2010', 'model_2011', 'model_2012', 'model_2013', 'model_2014', 'model_2015', 'model_2016'])

Create dictionary of testsets that contain all test-words along with their embedding in the respective year

In [17]:
test_case_dict_cade_all = test_util.create_test_case_dict_cade(test_cases_1_all, models_cade)

2 Testcases are not in the vocab of the model(s)


In [18]:
test_case_dict_cade_static = test_util.create_test_case_dict_cade(test_cases_1_static, models_cade)

2 Testcases are not in the vocab of the model(s)


In [19]:
test_case_dict_cade_dynamic = test_util.create_test_case_dict_cade(test_cases_1_dynamic, models_cade)

2 Testcases are not in the vocab of the model(s)


Retrieve most similar words for each testword in each year

In [20]:
similarities_cade_all = test_util.get_similarities_of_models(models_cade, test_case_dict_cade_all)

  0%|          | 0/497 [00:00<?, ?it/s]

In [21]:
similarities_cade_static = test_util.get_similarities_of_models(models_cade, test_case_dict_cade_static)

  0%|          | 0/441 [00:00<?, ?it/s]

In [22]:
similarities_cade_dynamic = test_util.get_similarities_of_models(models_cade, test_case_dict_cade_dynamic)

  0%|          | 0/497 [00:00<?, ?it/s]

### Static Word2Vec

In [23]:
model_static = Word2Vec.load(str(path_to_static_model / "w2v_model.model"))

In [24]:
test_case_dict_static_all = test_util.create_test_case_dict_static(model_static, test_cases_1_all)

0 Testcases are not in the vocab of the model


In [25]:
test_case_dict_static_static = test_util.create_test_case_dict_static(model_static, test_cases_1_static)

0 Testcases are not in the vocab of the model


In [26]:
test_case_dict_static_dynamic = test_util.create_test_case_dict_static(model_static, test_cases_1_dynamic)

0 Testcases are not in the vocab of the model


In [27]:
similarities_static_all = test_util.get_similarities_of_models_static(model_static, test_case_dict_static_all)

  0%|          | 0/499 [00:00<?, ?it/s]

In [28]:
similarities_static_static = test_util.get_similarities_of_models_static(model_static, test_case_dict_static_static)

  0%|          | 0/443 [00:00<?, ?it/s]

In [29]:
similarities_static_dynamic = test_util.get_similarities_of_models_static(model_static, test_case_dict_static_dynamic)

  0%|          | 0/499 [00:00<?, ?it/s]

### TPPMI

In [162]:
%autoreload 2

from src.packages.TPPMI.ppmi_model import PPMIModel
from src.packages.TPPMI.tppmi_model import TPPMIModel
import src.test.util as test_util

In [31]:
sub_dirs = ["500", "1000", "2000", "4000", "6000"] # each dir stores ppmi-data with the respective number of context words

# Collecting .npz files
ppmi_data_files = sorted([file for dir in sub_dirs
                          for file in glob(str(path_to_tppmi_model / dir / "*.npz"))])

# Collecting .pkl files
words_files = sorted([file for dir in sub_dirs
                      for file in glob(str(path_to_tppmi_model / dir / "*.pkl"))])

Split context-words from timestamped-vocabularies

In [32]:
context_words_files = [path for path in words_files if "context-words" in path]
ppmi_vocab_files = [path for path in words_files if "context-words" not in path]

In [33]:
for context_words_file in context_words_files:
    print(context_words_file)

../../data/ppmi-matrices/nyt-data/1000/context-words.pkl
../../data/ppmi-matrices/nyt-data/2000/context-words.pkl
../../data/ppmi-matrices/nyt-data/4000/context-words.pkl
../../data/ppmi-matrices/nyt-data/500/context-words.pkl
../../data/ppmi-matrices/nyt-data/6000/context-words.pkl


In [34]:
# Get ppmi-matrices and vocab
ppmi_matrices_500 = {}
ppmi_matrices_1000 = {}
ppmi_matrices_2000 = {}
ppmi_matrices_4000 = {}
ppmi_matrices_6000 = {}

for filenames in zip(ppmi_vocab_files, ppmi_data_files):
    ppmi_matrix = sp.load_npz(filenames[1])
    with open(filenames[0], "rb") as f:
        vocab = pickle.load(f)
    key = filenames[0].split("ppmi-")[2][0:4]
    if "500" in filenames[0] and "500" in filenames[1]:
        ppmi_matrices_500[key] = {"ppmi_matrix" : ppmi_matrix, "vocab": vocab}
    elif "1000" in filenames[0] and "1000" in filenames[1]:
        ppmi_matrices_1000[key] = {"ppmi_matrix" : ppmi_matrix, "vocab": vocab}
    elif "4000" in filenames[0] and "4000" in filenames[1]:
        ppmi_matrices_4000[key] = {"ppmi_matrix" : ppmi_matrix, "vocab": vocab}
    elif "6000" in filenames[0] and "6000" in filenames[1]:
        ppmi_matrices_6000[key] = {"ppmi_matrix" : ppmi_matrix, "vocab": vocab}
    else:
        ppmi_matrices_2000[key] = {"ppmi_matrix" : ppmi_matrix, "vocab": vocab}

# Get common context-words
for context_words_file in context_words_files:
    with open(context_words_file, "rb") as f:
        if "500" in str(context_words_file):
            context_words_500 = pickle.load(f)
        elif "1000" in str(context_words_file):
            context_words_1000 = pickle.load(f)
        elif "4000" in str(context_words_file):
            context_words_4000 = pickle.load(f)
        elif "6000" in str(context_words_file):
            context_words_6000 = pickle.load(f)
        else:
            context_words_2000 = pickle.load(f)

In [35]:
ppmi_matrices_500.keys()

dict_keys(['1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016'])

Create ppmi_model objects

In [36]:
ppmi_models_500 = {key: PPMIModel.construct_from_data(ppmi_data["ppmi_matrix"], ppmi_data["vocab"], context_words_500, normalize=True) for key, ppmi_data in ppmi_matrices_500.items()}

ppmi_models_1000 = {key: PPMIModel.construct_from_data(ppmi_data["ppmi_matrix"], ppmi_data["vocab"], context_words_1000, normalize=True) for key, ppmi_data in ppmi_matrices_1000.items()}

ppmi_models_2000 = {key: PPMIModel.construct_from_data(ppmi_data["ppmi_matrix"], ppmi_data["vocab"], context_words_2000, normalize=True) for key, ppmi_data in ppmi_matrices_2000.items()}

ppmi_models_4000 = {key: PPMIModel.construct_from_data(ppmi_data["ppmi_matrix"], ppmi_data["vocab"], context_words_4000, normalize=True) for key, ppmi_data in ppmi_matrices_4000.items()}

ppmi_models_6000 = {key: PPMIModel.construct_from_data(ppmi_data["ppmi_matrix"], ppmi_data["vocab"], context_words_6000, normalize=True) for key, ppmi_data in ppmi_matrices_6000.items()}

In [37]:
tppmi_model_500 = TPPMIModel(ppmi_models_500, dates="years", smooth=False)
tppmi_model_1000 = TPPMIModel(ppmi_models_1000, dates="years", smooth=False)
tppmi_model_2000 = TPPMIModel(ppmi_models_2000, dates="years", smooth=False)

tppmi_model_4000 = TPPMIModel(ppmi_models_4000, dates="years", smooth=False)
tppmi_model_6000 = TPPMIModel(ppmi_models_6000, dates="years", smooth=False)

Create test-dictionaries for all test cases

In [38]:
test_case_dict_tppmi_500_all = test_util.create_test_case_dict_tppmi(tppmi_model_500, test_cases_1_all)

19 Testcases are not in the vocab of the model


In [39]:
test_case_dict_tppmi_1000_all = test_util.create_test_case_dict_tppmi(tppmi_model_1000, test_cases_1_all)

19 Testcases are not in the vocab of the model


In [40]:
test_case_dict_tppmi_2000_all = test_util.create_test_case_dict_tppmi(tppmi_model_2000, test_cases_1_all)

19 Testcases are not in the vocab of the model


In [41]:
test_case_dict_tppmi_4000_all = test_util.create_test_case_dict_tppmi(tppmi_model_4000, test_cases_1_all)

19 Testcases are not in the vocab of the model


In [42]:
test_case_dict_tppmi_6000_all = test_util.create_test_case_dict_tppmi(tppmi_model_6000, test_cases_1_all)

19 Testcases are not in the vocab of the model


Create test-dictionaries for static test cases

In [44]:
test_case_dict_tppmi_500_static = test_util.create_test_case_dict_tppmi(tppmi_model_500, test_cases_1_static)

15 Testcases are not in the vocab of the model


In [45]:
test_case_dict_tppmi_1000_static = test_util.create_test_case_dict_tppmi(tppmi_model_1000, test_cases_1_static)

15 Testcases are not in the vocab of the model


In [46]:
test_case_dict_tppmi_2000_static = test_util.create_test_case_dict_tppmi(tppmi_model_2000, test_cases_1_static)

15 Testcases are not in the vocab of the model


In [47]:
test_case_dict_tppmi_4000_static = test_util.create_test_case_dict_tppmi(tppmi_model_4000, test_cases_1_static)

15 Testcases are not in the vocab of the model


In [48]:
test_case_dict_tppmi_6000_static = test_util.create_test_case_dict_tppmi(tppmi_model_6000, test_cases_1_static)

15 Testcases are not in the vocab of the model


In [49]:
#test_case_dict_tppmi_8000_static = test_util.create_test_case_dict_tppmi(tppmi_model_8000, test_cases_1_static)

Create test-dictionaries for dynamic test cases

In [50]:
test_case_dict_tppmi_500_dynamic = test_util.create_test_case_dict_tppmi(tppmi_model_500, test_cases_1_dynamic)

19 Testcases are not in the vocab of the model


In [51]:
test_case_dict_tppmi_1000_dynamic = test_util.create_test_case_dict_tppmi(tppmi_model_1000, test_cases_1_dynamic)

19 Testcases are not in the vocab of the model


In [52]:
test_case_dict_tppmi_2000_dynamic = test_util.create_test_case_dict_tppmi(tppmi_model_2000, test_cases_1_dynamic)

19 Testcases are not in the vocab of the model


In [53]:
test_case_dict_tppmi_4000_dynamic = test_util.create_test_case_dict_tppmi(tppmi_model_4000, test_cases_1_dynamic)

19 Testcases are not in the vocab of the model


In [54]:
test_case_dict_tppmi_6000_dynamic = test_util.create_test_case_dict_tppmi(tppmi_model_6000, test_cases_1_dynamic)

19 Testcases are not in the vocab of the model


#### Calculate similarities

Calculate similarities for all testcases

In [56]:
similarities_tppmi_500_all = test_util.get_similarites_of_models_tppmi(tppmi_model_500, test_case_dict_tppmi_500_all)

  0%|          | 0/480 [00:00<?, ?it/s]

In [57]:
similarities_tppmi_1000_all = test_util.get_similarites_of_models_tppmi(tppmi_model_1000, test_case_dict_tppmi_1000_all)

  0%|          | 0/480 [00:00<?, ?it/s]

In [58]:
similarities_tppmi_2000_all = test_util.get_similarites_of_models_tppmi(tppmi_model_2000, test_case_dict_tppmi_2000_all)

  0%|          | 0/480 [00:00<?, ?it/s]

In [59]:
similarities_tppmi_4000_all = test_util.get_similarites_of_models_tppmi(tppmi_model_4000, test_case_dict_tppmi_4000_all)

  0%|          | 0/480 [00:00<?, ?it/s]

In [60]:
similarities_tppmi_6000_all = test_util.get_similarites_of_models_tppmi(tppmi_model_6000, test_case_dict_tppmi_6000_all)

  0%|          | 0/480 [00:00<?, ?it/s]

Calculate similarities for static testcases

In [62]:
similarities_tppmi_500_static = test_util.get_similarites_of_models_tppmi(tppmi_model_500, test_case_dict_tppmi_500_static)

  0%|          | 0/428 [00:00<?, ?it/s]

In [63]:
similarities_tppmi_1000_static = test_util.get_similarites_of_models_tppmi(tppmi_model_1000, test_case_dict_tppmi_1000_static)

  0%|          | 0/428 [00:00<?, ?it/s]

In [64]:
similarities_tppmi_2000_static = test_util.get_similarites_of_models_tppmi(tppmi_model_2000, test_case_dict_tppmi_2000_static)

  0%|          | 0/428 [00:00<?, ?it/s]

In [65]:
similarities_tppmi_4000_static = test_util.get_similarites_of_models_tppmi(tppmi_model_4000, test_case_dict_tppmi_4000_static)

  0%|          | 0/428 [00:00<?, ?it/s]

In [66]:
similarities_tppmi_6000_static = test_util.get_similarites_of_models_tppmi(tppmi_model_6000, test_case_dict_tppmi_6000_static)

  0%|          | 0/428 [00:00<?, ?it/s]

Calculate similarities for dynamic testcases

In [68]:
similarities_tppmi_500_dynamic = test_util.get_similarites_of_models_tppmi(tppmi_model_500, test_case_dict_tppmi_500_dynamic)

  0%|          | 0/480 [00:00<?, ?it/s]

In [69]:
similarities_tppmi_1000_dynamic = test_util.get_similarites_of_models_tppmi(tppmi_model_1000, test_case_dict_tppmi_1000_dynamic)

  0%|          | 0/480 [00:00<?, ?it/s]

In [70]:
similarities_tppmi_2000_dynamic = test_util.get_similarites_of_models_tppmi(tppmi_model_2000, test_case_dict_tppmi_2000_dynamic)

  0%|          | 0/480 [00:00<?, ?it/s]

In [71]:
similarities_tppmi_4000_dynamic = test_util.get_similarites_of_models_tppmi(tppmi_model_4000, test_case_dict_tppmi_4000_dynamic)

  0%|          | 0/480 [00:00<?, ?it/s]

In [72]:
similarities_tppmi_6000_dynamic = test_util.get_similarites_of_models_tppmi(tppmi_model_6000, test_case_dict_tppmi_6000_dynamic)

  0%|          | 0/480 [00:00<?, ?it/s]

# Experiment

To examine the quality of embedding alignment, we create a task to query equivalences across years.

For example, given obama-2012, we want to query its equivalent word in 2002. As we know obama is the U.S. president in 2012; its equivalent in 2002 is bush, who was the U.S. president at that time. In this way, we create two testsets.

All results are rounded to three decimal places.

In [94]:
cutoffs = [1, 3, 5, 10]
list_of_types = ["static", "dynamic", "all"]

list_of_data_cade = [[similarities_cade_static, test_data_1_static],
                     [similarities_cade_dynamic, test_data_1_dynamic], [similarities_cade_all, test_data_1_all]]

list_of_data_tppmi_500 = [[similarities_tppmi_500_static, test_data_1_static],
                       [similarities_tppmi_500_dynamic, test_data_1_dynamic], [similarities_tppmi_500_all, test_data_1_all]]
list_of_data_tppmi_1000 = [[similarities_tppmi_1000_static, test_data_1_static],
                          [similarities_tppmi_1000_dynamic, test_data_1_dynamic], [similarities_tppmi_1000_all, test_data_1_all]]
list_of_data_tppmi_2000 = [[similarities_tppmi_2000_static, test_data_1_static],
                           [similarities_tppmi_2000_dynamic, test_data_1_dynamic], [similarities_tppmi_2000_all, test_data_1_all]]

list_of_data_tppmi_4000 = [[similarities_tppmi_4000_static, test_data_1_static],
                           [similarities_tppmi_4000_dynamic, test_data_1_dynamic], [similarities_tppmi_4000_all, test_data_1_all]]
list_of_data_tppmi_6000 = [[similarities_tppmi_6000_static, test_data_1_static],
                           [similarities_tppmi_6000_dynamic, test_data_1_dynamic], [similarities_tppmi_6000_all, test_data_1_all]]
list_of_data_static = [[similarities_static_static, test_data_1_static],
                       [similarities_static_dynamic, test_data_1_dynamic], [similarities_static_all, test_data_1_all]]

config_dict_cade = {key: value for key, value in zip(list_of_types, list_of_data_cade)}
config_dict_tppmi_500 = {key: value for key, value in zip(list_of_types, list_of_data_tppmi_500)}
config_dict_tppmi_1000 = {key: value for key, value in zip(list_of_types, list_of_data_tppmi_1000)}
config_dict_tppmi_2000 = {key: value for key, value in zip(list_of_types, list_of_data_tppmi_2000)}
config_dict_tppmi_4000 = {key: value for key, value in zip(list_of_types, list_of_data_tppmi_4000)}
config_dict_tppmi_6000 = {key: value for key, value in zip(list_of_types, list_of_data_tppmi_6000)}
config_dict_static = {key: value for key, value in zip(list_of_types, list_of_data_static)}

scores_cade = {key: dict() for key in list_of_types}
scores_static = {key: dict() for key in list_of_types}
scores_tppmi_500 = {key: dict() for key in list_of_types}
scores_tppmi_1000 = {key: dict() for key in list_of_types}
scores_tppmi_2000 = {key: dict() for key in list_of_types}
scores_tppmi_4000 = {key: dict() for key in list_of_types}
scores_tppmi_6000 = {key: dict() for key in list_of_types}

## Mean Reciprocal Rank (@10)

The Mean Reciprocal Rank (MRR) is a statistical measure used to evaluate the performance of a system that returns a ranked list of responses to queries. It is the average of the reciprocal ranks of the first correct answer for each query, where the reciprocal rank is the inverse of the rank at which the first relevant answer is found.
It is evaluated @10

### TWEC

In [95]:
for key, value in tqdm(config_dict_cade.items()):
    scores_cade[key]["mrr@10"] = round(test_util.calculate_rank_metric(value[0], value[1], metric='MRR'), 3)

  0%|          | 0/3 [00:00<?, ?it/s]

### Static Word2Vec

In [96]:
for key, value in tqdm(config_dict_static.items()):
    scores_static[key]["mrr@10"] = round(test_util.calculate_rank_metric_static(value[0], value[1], metric='MRR'), 3)

  0%|          | 0/3 [00:00<?, ?it/s]

### TPPMI

In [97]:
for key, value in tqdm(config_dict_tppmi_500.items()):
    scores_tppmi_500[key]["mrr@10"] = round(test_util.calculate_rank_metric(value[0], value[1], metric='MRR'), 3)

  0%|          | 0/3 [00:00<?, ?it/s]

In [98]:
for key, value in tqdm(config_dict_tppmi_1000.items()):
    scores_tppmi_1000[key]["mrr@10"] = round(test_util.calculate_rank_metric(value[0], value[1], metric='MRR'), 3)

  0%|          | 0/3 [00:00<?, ?it/s]

In [99]:
for key, value in tqdm(config_dict_tppmi_2000.items()):
    scores_tppmi_2000[key]["mrr@10"] = round(test_util.calculate_rank_metric(value[0], value[1], metric='MRR'), 3)

  0%|          | 0/3 [00:00<?, ?it/s]

In [100]:
for key, value in tqdm(config_dict_tppmi_4000.items()):
    scores_tppmi_4000[key]["mrr@10"] = round(test_util.calculate_rank_metric(value[0], value[1], metric='MRR'), 3)

  0%|          | 0/3 [00:00<?, ?it/s]

In [101]:
for key, value in tqdm(config_dict_tppmi_6000.items()):
    scores_tppmi_6000[key]["mrr@10"] = round(test_util.calculate_rank_metric(value[0], value[1], metric='MRR'), 3)

  0%|          | 0/3 [00:00<?, ?it/s]

## Mean Precision (@K)

As introduced by Yao et al.(2018) the MP@K is defined as such: consider the K words most similar to the query embedding for the given year. The Precision@K for a particular test i, represented as P@K[i], equals 1 if the target word appears within this set of K words; otherwise, it assumes a value of 0.

### TWEC

In [103]:
for key, value in tqdm(config_dict_cade.items()):
    for k in cutoffs:
        score_key = f"mp@{k}"
        scores_cade[key][score_key] = round(test_util.calculate_rank_metric(value[0], value[1], metric='MP', k=k), 3)

  0%|          | 0/3 [00:00<?, ?it/s]

### Static Word2Vec

In [104]:
for key, value in tqdm(config_dict_static.items()):
    for k in cutoffs:
        score_key = f"mp@{k}"
        scores_static[key][score_key] = round(test_util.calculate_rank_metric_static(value[0], value[1], metric='MP', k=k), 3)

  0%|          | 0/3 [00:00<?, ?it/s]

### TPPMI

In [105]:
for key, value in tqdm(config_dict_tppmi_500.items()):
    for k in cutoffs:
        score_key = f"mp@{k}"
        scores_tppmi_500[key][score_key] = round(test_util.calculate_rank_metric(value[0], value[1], metric='MP', k=k), 3)

  0%|          | 0/3 [00:00<?, ?it/s]

In [106]:
for key, value in tqdm(config_dict_tppmi_1000.items()):
    for k in cutoffs:
        score_key = f"mp@{k}"
        scores_tppmi_1000[key][score_key] = round(test_util.calculate_rank_metric(value[0], value[1], metric='MP', k=k), 3)

  0%|          | 0/3 [00:00<?, ?it/s]

In [107]:
for key, value in tqdm(config_dict_tppmi_2000.items()):
    for k in cutoffs:
        score_key = f"mp@{k}"
        scores_tppmi_2000[key][score_key] = round(test_util.calculate_rank_metric(value[0], value[1], metric='MP', k=k), 3)

  0%|          | 0/3 [00:00<?, ?it/s]

In [108]:
for key, value in tqdm(config_dict_tppmi_4000.items()):
    for k in cutoffs:
        score_key = f"mp@{k}"
        scores_tppmi_4000[key][score_key] = round(test_util.calculate_rank_metric(value[0], value[1], metric='MP', k=k), 3)

  0%|          | 0/3 [00:00<?, ?it/s]

In [109]:
for key, value in tqdm(config_dict_tppmi_6000.items()):
    for k in cutoffs:
        score_key = f"mp@{k}"
        scores_tppmi_6000[key][score_key] = round(test_util.calculate_rank_metric(value[0], value[1], metric='MP', k=k), 3)

  0%|          | 0/3 [00:00<?, ?it/s]

## Results

In [111]:
score_table_cade = pd.DataFrame(scores_cade).T
score_table_static = pd.DataFrame(scores_static).T
score_table_tppmi_500 = pd.DataFrame(scores_tppmi_500).T
score_table_tppmi_1000 = pd.DataFrame(scores_tppmi_1000).T
score_table_tppmi_2000 = pd.DataFrame(scores_tppmi_2000).T
score_table_tppmi_4000 = pd.DataFrame(scores_tppmi_4000).T
score_table_tppmi_6000 = pd.DataFrame(scores_tppmi_6000).T
#score_table_tppmi_8000 = pd.DataFrame(scores_tppmi_8000).T
print("Scores of TWEC")
display(score_table_cade)
print("Scores of TPPMI (500 context-words)")
display(score_table_tppmi_500)
print("Scores of TPPMI (1000 context-words)")
display(score_table_tppmi_1000)
print("Scores of TPPMI (2000 context-words)")
display(score_table_tppmi_2000)
print("Scores of TPPMI (4000 context-words)")
display(score_table_tppmi_4000)
print("Scores of TPPMI (6000 context-words)")
display(score_table_tppmi_6000)
#print("Scores of TPPMI (8000 context-words)")
#display(score_table_tppmi_8000)
print("Scores of Static Word2Vec (Baseline)")
display(score_table_static)

Scores of TWEC


Unnamed: 0,mrr@10,mp@1,mp@3,mp@5,mp@10
static,0.587,0.501,0.656,0.708,0.746
dynamic,0.346,0.268,0.398,0.451,0.512
all,0.394,0.316,0.448,0.501,0.556


Scores of TPPMI (500 context-words)


Unnamed: 0,mrr@10,mp@1,mp@3,mp@5,mp@10
static,0.254,0.198,0.287,0.333,0.39
dynamic,0.094,0.063,0.106,0.134,0.178
all,0.131,0.094,0.15,0.181,0.227


Scores of TPPMI (1000 context-words)


Unnamed: 0,mrr@10,mp@1,mp@3,mp@5,mp@10
static,0.361,0.281,0.405,0.476,0.548
dynamic,0.169,0.117,0.197,0.24,0.299
all,0.209,0.15,0.241,0.29,0.353


Scores of TPPMI (2000 context-words)


Unnamed: 0,mrr@10,mp@1,mp@3,mp@5,mp@10
static,0.442,0.349,0.504,0.564,0.639
dynamic,0.188,0.13,0.218,0.261,0.337
all,0.244,0.178,0.283,0.33,0.406


Scores of TPPMI (4000 context-words)


Unnamed: 0,mrr@10,mp@1,mp@3,mp@5,mp@10
static,0.428,0.332,0.49,0.557,0.632
dynamic,0.187,0.122,0.225,0.273,0.345
all,0.238,0.168,0.28,0.331,0.402


Scores of TPPMI (6000 context-words)


Unnamed: 0,mrr@10,mp@1,mp@3,mp@5,mp@10
static,0.429,0.327,0.497,0.569,0.64
dynamic,0.192,0.129,0.229,0.276,0.337
all,0.247,0.176,0.29,0.344,0.406


Scores of Static Word2Vec (Baseline)


Unnamed: 0,mrr@10,mp@1,mp@3,mp@5,mp@10
static,1.0,1.0,1.0,1.0,1.0
dynamic,0.147,0.0,0.24,0.372,0.463
all,0.374,0.266,0.442,0.539,0.606


In [92]:
'''
score_table_cade['Model'] = 'TWEC'
score_table_tppmi_500['Model'] = 'TPPMI (500 context-words)'
score_table_tppmi_1000['Model'] = 'TPPMI (1000 context-words)'
score_table_tppmi_2000['Model'] = 'TPPMI (2000 context-words)'
score_table_static['Model'] = 'Static Word2Vec (Baseline)'

merged_score_table = pd.concat([score_table_cade, score_table_tppmi_500,
                                score_table_tppmi_1000, score_table_tppmi_2000, score_table_static], ignore_index=False)

merged_score_table.set_index(['Model', merged_score_table.index], inplace=True)
model_order = ['TWEC', 'TPPMI (500 context-words)', 'TPPMI (1000 context-words)', 'TPPMI (2000 context-words)', 'Static Word2Vec (Baseline)']
merged_score_table = merged_score_table.reindex(model_order, level='Model')
merged_score_table = merged_score_table.round(3)

pd.set_option('display.max_rows', 1000)  # Replace None with a large number if the table is too long
pd.set_option('display.max_columns', 1000)

merged_score_table''';

----------------------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------------------

In [116]:
score_table_dir = Path("../../data/results/nyt-data")
score_table_dir.mkdir(parents=True, exist_ok=True)

# Saving the CADE score table to CSV
score_table_cade.to_csv(score_table_dir / "score_table_cade.csv", index=True)

# Saving the TPPMI score table to CSV
score_table_tppmi_500.to_csv(score_table_dir / 'score_table_tppmi_500.csv', index=True)
score_table_tppmi_1000.to_csv(score_table_dir / 'score_table_tppmi_1000.csv', index=True)
score_table_tppmi_2000.to_csv(score_table_dir / 'score_table_tppmi_2000.csv', index=True)
score_table_tppmi_4000.to_csv(score_table_dir / 'score_table_tppmi_4000.csv', index=True)
score_table_tppmi_6000.to_csv(score_table_dir / 'score_table_tppmi_6000.csv', index=True)

# Saving the Static Word2Vec (Baseline) score table to CSV
score_table_static.to_csv(score_table_dir / 'score_table_static.csv', index=True)

# Scores from Memory

In [163]:
score_tables = test_util.load_score_tables(score_table_dir)

In [164]:
score_tables.keys()

dict_keys(['score_table_cade', 'score_table_static', 'score_table_tppmi_1000', 'score_table_tppmi_2000', 'score_table_tppmi_4000', 'score_table_tppmi_500', 'score_table_tppmi_6000'])

In [167]:
for name, score_table in score_tables.items():
    print(f"Scores for the model: {name.split('table_')[-1].capitalize()}")
    display(score_table)

Scores for the model: Cade


Unnamed: 0,mrr@10,mp@1,mp@3,mp@5,mp@10
static,0.587,0.501,0.656,0.708,0.746
dynamic,0.346,0.268,0.398,0.451,0.512
all,0.394,0.316,0.448,0.501,0.556


Scores for the model: Static


Unnamed: 0,mrr@10,mp@1,mp@3,mp@5,mp@10
static,1.0,1.0,1.0,1.0,1.0
dynamic,0.147,0.0,0.24,0.372,0.463
all,0.374,0.266,0.442,0.539,0.606


Scores for the model: Tppmi_1000


Unnamed: 0,mrr@10,mp@1,mp@3,mp@5,mp@10
static,0.361,0.281,0.405,0.476,0.548
dynamic,0.169,0.117,0.197,0.24,0.299
all,0.209,0.15,0.241,0.29,0.353


Scores for the model: Tppmi_2000


Unnamed: 0,mrr@10,mp@1,mp@3,mp@5,mp@10
static,0.442,0.349,0.504,0.564,0.639
dynamic,0.188,0.13,0.218,0.261,0.337
all,0.244,0.178,0.283,0.33,0.406


Scores for the model: Tppmi_4000


Unnamed: 0,mrr@10,mp@1,mp@3,mp@5,mp@10
static,0.428,0.332,0.49,0.557,0.632
dynamic,0.187,0.122,0.225,0.273,0.345
all,0.238,0.168,0.28,0.331,0.402


Scores for the model: Tppmi_500


Unnamed: 0,mrr@10,mp@1,mp@3,mp@5,mp@10
static,0.254,0.198,0.287,0.333,0.39
dynamic,0.094,0.063,0.106,0.134,0.178
all,0.131,0.094,0.15,0.181,0.227


Scores for the model: Tppmi_6000


Unnamed: 0,mrr@10,mp@1,mp@3,mp@5,mp@10
static,0.429,0.327,0.497,0.569,0.64
dynamic,0.192,0.129,0.229,0.276,0.337
all,0.247,0.176,0.29,0.344,0.406
