In [None]:
from sklearn.manifold import TSNE
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
from adjustText import adjust_text

color_list = [
    'lightgreen', 'limegreen', 'forestgreen', 'mediumseagreen', 'darkcyan',
    'turquoise', 'deepskyblue', 'dodgerblue', 'royalblue', 'darkblue', 'darkslateblue',
    'rebeccapurple', 'blueviolet', 'darkmagenta', 'orchid', 'plum'
]

def neighbour_path(models, word):
    """
    Search for the nearest neighbours for target word at different times
    Plot the path of word vectors across time in their neighbourhood
    """

    labels = []
    vectors = []
    year = 1945

    for model in models:
        labels.append(word + str(year))
        vectors.append(model.wv[word])
        neighbours = model.wv.most_similar(word, topn=10)

        data = {'word': [word]}
        for i, (neighbour, similarity) in enumerate(neighbours, 1):
            data[f'neighbour_{i}'] = [neighbour]
        df = pd.DataFrame(data)
        df.to_csv('neighbourhood/' + word + '.csv', index=False, encoding='utf-8')

        for neighbour in neighbours[:5]:
            labels.append(neighbour[0])
            vectorss.append(model.wv[label])
        year += 5

    tsne_model = TSNE(perplexity=30, n_components=2, init='pca', n_iter=1000, random_state=23)
    flattend_2d = tsne_model.fit_transform(vectors)

    x, y = [], []
    for value in flattend_2d:
        x.append(value[0])
        y.append(value[1])
    
    x_line, y_line = [], []
    for i in range(len(labels) // 6):
        x_line.append(x[i * 6])
        y_line.append(y[i * 6])

    plt.figure(dpi=200)
    this_font = 'SimHei'
    plt.plot(x_line, y_line, color='steelblue')

    texts = []
    for i in range(len(labels) // 6):
        index = i * 6
        color = color_list[i]
        plt.scatter(x[index], y[index], s=40, c=color)
        texts.append(plt.annotate(labels[index], fontproperties=this_font, fontsize = 25, color=color,
            xy=(x[index], y[index]), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom'))
        for j in range(5):
            index += 1
            plt.scatter(x[index], y[index], s=10, c=color)
            texts.append(plt.annotate(labels[index], fontproperties=this_font, fontsize = 15, color=color,
                xy=(x[index], y[index]), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom'))
    
    adjust_text(texts)
    plt.savefig('visualize_path/' + word + '.png')

In [None]:
from gensim.models.word2vec import Word2Vec

modeldir = '../../compass/5-year/model/'
models = []

for i in range(1945, 2025, 5):
    fiveyear = modeldir + str(i) + '-' + str(i + 4) + '.model'
    model = Word2Vec.load(fiveyear)
    models.append(model)