Testing temporal word embeddings against the ChiWUG COMPARE score.

In [1]:
import pandas as pd
import numpy as np
import scipy.stats as stats
from gensim.models.word2vec import Word2Vec

df = pd.read_csv('chi_wug/stats_groupings.csv', sep='\t')

lemma = df[['lemma', 'COMPARE']].to_records(index=False).tolist()
lemma = sorted(lemma, key=lambda x: x[1])

to_csv = df[['lemma', 'COMPARE']].copy()


def cosine_sim(x,y):
    num = np.dot(x, y)
    denom = np.linalg.norm(x) * np.linalg.norm(y)
    return num / denom


def word_compare(model1, model2, lemma, name_type=None):
    word_compare_std = []
    word_compare_pre = []
    for word, COMPARE in lemma:
        if word in model1.wv.vocab and word in model2.wv.vocab:
            word_compare_std.append(float(COMPARE))
            word_vec1 = model1.wv[word]
            word_vec2 = model2.wv[word]
            cos_sim = cosine_sim(word_vec1, word_vec2)
            word_compare_pre.append(cos_sim)
            if name_type:
                to_csv.loc[to_csv['lemma'] == word, name_type] = cos_sim
        else:
            print('Skip:', word)
    
    spear_coef, p_value = stats.spearmanr(word_compare_std, word_compare_pre)
    print("Spearman Score: " + str(spear_coef))
    print("P value: ", str(p_value))

In [2]:
for model_name in ['compass', 'alignment']:
    for model_type in ['sgns', 'cbow']:
        name_type = model_name + '-' + model_type
        print('Testing ' + model_name + ' + ' + model_type)
        print('============================')
        model_path_before = '../../' + model_name + '/2-slices/model-' + model_type + '/1954-1978.model'
        model_before = Word2Vec.load(model_path_before)
        model_path_after = '../../' + model_name + '/2-slices/model-' + model_type + '/1979-2003.model'
        model_after = Word2Vec.load(model_path_after)
        word_compare(model_before, model_after, lemma, name_type)
        print('............................')

Testing compass + sgns
Spearman Score: 0.43621863738290895
P value:  0.004894881430871465
............................
Testing compass + cbow
Spearman Score: 0.4233638399379835
P value:  0.006488710918843536
............................
Testing alignment + sgns
Spearman Score: 0.5197279054557825
P value:  0.0005882357109100949
............................
Testing alignment + cbow
Spearman Score: 0.46352335312359005
P value:  0.0025927700110183107
............................


In [3]:
to_csv.to_csv('change_eval.csv', index=False)

Model works especially unwell with one-character "words", this may be due to the nature of Chinese (that boundary between characters and words can be obscure). Therefore, we also try removing one-character test examples and test the rest.

In [4]:
lemma_no_ch = [pair for pair in lemma if len(pair[0]) > 1]

for model_name in ['compass', 'alignment']:
    for model_type in ['sgns', 'cbow']:
        name_type = model_name + '-' + model_type
        print('Testing ' + model_name + ' + ' + model_type)
        print('============================')
        model_path_before = '../../' + model_name + '/2-slices/model-' + model_type + '/1954-1978.model'
        model_before = Word2Vec.load(model_path_before)
        model_path_after = '../../' + model_name + '/2-slices/model-' + model_type + '/1979-2003.model'
        model_after = Word2Vec.load(model_path_after)
        word_compare(model_before, model_after, lemma_no_ch, name_type)
        print('............................')

Testing compass + sgns
Spearman Score: 0.6668493963297919
P value:  0.00010654845431080361
............................
Testing compass + cbow
Spearman Score: 0.6463098872025911
P value:  0.00020278225869589658
............................
Testing alignment + sgns
Spearman Score: 0.7125840366530262
P value:  2.09687559579652e-05
............................
Testing alignment + cbow
Spearman Score: 0.6616460540175677
P value:  0.00012598960199119188
............................


And we test the one-character examples only.

In [5]:
lemma_only_ch = [pair for pair in lemma if len(pair[0]) == 1]

for model_name in ['compass', 'alignment']:
    for model_type in ['sgns', 'cbow']:
        name_type = model_name + '-' + model_type
        print('Testing ' + model_name + ' + ' + model_type)
        print('============================')
        model_path_before = '../../' + model_name + '/2-slices/model-' + model_type + '/1954-1978.model'
        model_before = Word2Vec.load(model_path_before)
        model_path_after = '../../' + model_name + '/2-slices/model-' + model_type + '/1979-2003.model'
        model_after = Word2Vec.load(model_path_after)
        word_compare(model_before, model_after, lemma_only_ch, name_type)
        print('............................')

Testing compass + sgns
Spearman Score: 0.034965034965034975
P value:  0.9140933424107598
............................
Testing compass + cbow
Spearman Score: 0.3216783216783217
P value:  0.30790987618065263
............................
Testing alignment + sgns
Spearman Score: 0.18181818181818185
P value:  0.5717012385276553
............................
Testing alignment + cbow
Spearman Score: 0.23076923076923078
P value:  0.470531995381542
............................
