Testing temporal word embeddings against the ChiWUG COMPARE score.

In [21]:
import pandas as pd
import numpy as np
import scipy.stats as stats
from gensim.models.word2vec import Word2Vec

df = pd.read_csv('chi_wug/stats_groupings.csv', sep='\t')

lemma = df[['lemma', 'COMPARE']].to_records(index=False).tolist()
lemma = sorted(lemma, key=lambda x: x[1])

to_csv = df[['lemma', 'COMPARE']].copy()


def cosine_sim(x,y):
    num = np.dot(x, y)
    denom = np.linalg.norm(x) * np.linalg.norm(y)
    return num / denom


def word_compare(model1, model2, lemma, name_type=None):
    word_compare_std = []
    word_compare_pre = []
    for word, COMPARE in lemma:
        if word in model1.wv.vocab and word in model2.wv.vocab:
            word_compare_std.append(float(COMPARE))
            word_vec1 = model1.wv[word]
            word_vec2 = model2.wv[word]
            cos_sim = cosine_sim(word_vec1, word_vec2)
            word_compare_pre.append(cos_sim)
            if name_type:
                to_csv.loc[to_csv['lemma'] == word, name_type] = cos_sim
        else:
            print('Skip:', word)
    
    spear_coef, p_value = stats.spearmanr(word_compare_std, word_compare_pre)
    print("Spearman Score: " + str(spear_coef))
    print("P value: ", str(p_value))

In [14]:
for model_name in ['compass', 'alignment']:
    for model_type in ['sgns', 'cbow']:
        name_type = model_name + '-' + model_type
        print('Testing ' + model_name + ' + ' + model_type)
        print('============================')
        model_path_before = '../../' + model_name + '/2-slices/model-' + model_type + '/1954-1978.model'
        model_before = Word2Vec.load(model_path_before)
        model_path_after = '../../' + model_name + '/2-slices/model-' + model_type + '/1979-2003.model'
        model_after = Word2Vec.load(model_path_after)
        word_compare(model_before, model_after, lemma, name_type)
        print('............................')

Testing compass + sgns
Spearman Score: 0.47956839227017584
P value:  0.0017411354807959107
............................
Testing compass + cbow
Spearman Score: 0.4079756152593865
P value:  0.008968792241685229
............................
Testing alignment + sgns
Spearman Score: 0.5125967769461888
P value:  0.0007202625509275671
............................
Testing alignment + cbow
Spearman Score: 0.43818908078687563
P value:  0.004683429482687941
............................


In [17]:
to_csv.to_csv('temporal_eval.csv', index=False)

Model works especially unwell with one-character "words", this may be due to the nature of Chinese (that boundary between characters and words can be obscure). Therefore, we also try removing one-character test examples and test the rest.

In [22]:
lemma_no_ch = [pair for pair in lemma if len(pair[0]) > 1]

for model_name in ['compass', 'alignment']:
    for model_type in ['sgns', 'cbow']:
        name_type = model_name + '-' + model_type
        print('Testing ' + model_name + ' + ' + model_type)
        print('============================')
        model_path_before = '../../' + model_name + '/2-slices/model-' + model_type + '/1954-1978.model'
        model_before = Word2Vec.load(model_path_before)
        model_path_after = '../../' + model_name + '/2-slices/model-' + model_type + '/1979-2003.model'
        model_after = Word2Vec.load(model_path_after)
        word_compare(model_before, model_after, lemma_no_ch, name_type)
        print('............................')

Testing compass + sgns
Spearman Score: 0.7147749176265942
P value:  1.924946927725772e-05
............................
Testing compass + cbow
Spearman Score: 0.6391895240384947
P value:  0.00025073629738325105
............................
Testing alignment + sgns
Spearman Score: 0.7585925370979563
P value:  2.8997465686242916e-06
............................
Testing alignment + cbow
Spearman Score: 0.6684925570599681
P value:  0.0001009879916808052
............................


And we test the one-character examples only.

In [23]:
lemma_only_ch = [pair for pair in lemma if len(pair[0]) == 1]

for model_name in ['compass', 'alignment']:
    for model_type in ['sgns', 'cbow']:
        name_type = model_name + '-' + model_type
        print('Testing ' + model_name + ' + ' + model_type)
        print('============================')
        model_path_before = '../../' + model_name + '/2-slices/model-' + model_type + '/1954-1978.model'
        model_before = Word2Vec.load(model_path_before)
        model_path_after = '../../' + model_name + '/2-slices/model-' + model_type + '/1979-2003.model'
        model_after = Word2Vec.load(model_path_after)
        word_compare(model_before, model_after, lemma_only_ch, name_type)
        print('............................')

Testing compass + sgns
Spearman Score: 0.04895104895104895
P value:  0.879918578878675
............................
Testing compass + cbow
Spearman Score: 0.2517482517482518
P value:  0.42991878503288405
............................
Testing alignment + sgns
Spearman Score: 0.06993006993006995
P value:  0.8290236405313092
............................
Testing alignment + cbow
Spearman Score: 0.20979020979020982
P value:  0.5128409431818262
............................
