In [None]:
import pickle

import matplotlib.pyplot as plt
from tqdm.auto import tqdm

from gensim.models import Word2Vec

from utils import *

## Prepare data

In [None]:
## INPUTS
INTERVAL = [1867, 1920, 1950, 1980, 1995, 2010, 2022]

#Generate inteverals based on input
interval_range = [range(x[0],x[1]) for x in zip(INTERVAL[:-1],INTERVAL[1:])] 
timespans = list(zip(INTERVAL[:-1],INTERVAL[1:]))


In [None]:
for timespan in interval_range:
    sentences_in_timespan=[]
    for year in timespan:
        with open(f"../../data/processed/sentences/year/{year}_sentences.pkl","rb") as f:
            sentences_in_timespan.extend(f)
    with open(f"../../data/processed/sentences/custom_timespans/{timespan.start}_{timespan.stop}_sentences.pkl", "wb") as fp:   #Pickling
        pickle.dump(sentences_in_timespan, fp)


## Train static embeddings for each period

In [None]:
for timespan in tqdm(timespans):
    with open(f"../../data/processed/sentences/{timespan[0]}_{timespan[1]-1}_sentences.pkl", "rb") as s:
        sentences = pickle.load(s)
    model = Word2Vec(sentences, vector_size=300, window=8, min_count=10, workers=6, sg=1, ns_exponent=0.75, sample=3000)
    model.save(f"trained_models/aw2v/unaligned/{timespan[0]}_{timespan[1]-1}_w2v.pkl")

## Align static embeddings

In [None]:
for index, timespan in enumerate(timespans[:-1]):
    if index == 0:
        model_t = Word2Vec.load(f"trained_models/aw2v/unaligned/{timespans[index][0]}_{timespans[index][1]-1}_w2v.pkl")
        model_t.save(f"trained_models/aw2v/aligned/{timespans[index][0]}_{timespans[index][1]-1}_w2v.pkl")
    else:
        model_t = Word2Vec.load(f"trained_models/aw2v/aligned/{timespans[index][0]}_{timespans[index][1]-1}_w2v.pkl")
    model_t_1 = Word2Vec.load(f"trained_models/aw2v/unaligned/{timespans[index+1][0]}_{timespans[index+1][1]-1}_w2v.pkl")
    aligned_w2v_model = smart_procrustes_align_gensim(model_t,model_t_1)
    aligned_w2v_model.save(f"trained_models/aw2v/aligned/{timespans[index+1][0]}_{timespans[index+1][1]-1}_w2v.pkl")

for index in range(len(timespans)-1,0,-1):
    model_t = Word2Vec.load(f"trained_models/aw2v/aligned/{timespans[index][0]}_{timespans[index][1]-1}_w2v.pkl")
    model_t_1 = Word2Vec.load(f"trained_models/aw2v/aligned/{timespans[index-1][0]}_{timespans[index-1][1]-1}_w2v.pkl")
    aligned_w2v_model = smart_procrustes_align_gensim(model_t,model_t_1)
    aligned_w2v_model.save(f"trained_models/aw2v/aligned/{timespans[index-1][0]}_{timespans[index-1][1]-1}_w2v.pkl")
