### First, Import necessary modules

In [8]:
from tensorflow import keras
import pandas as pd
import numpy as np

### Then we use the BPEmb pretrained word embeddings

In [11]:
from bpemb import BPEmb

In [243]:
bpemb_en = BPEmb(lang="en", vs=50000, dim=25)

  0%|          | 0/4993562 [00:00<?, ?B/s]

downloading https://nlp.h-its.org/bpemb/en/en.wiki.bpe.vs50000.d25.w2v.bin.tar.gz


100%|██████████| 4993562/4993562 [00:00<00:00, 5804763.94B/s]


### Create example sentences

In [380]:
sentences_raw = ["I like to eat chicken", "I prefer to eat pizza", "I really really really really like to eat chicken", "Chicken is my absolute favourite dish in the evening", "The heaven is dark", "I like to build sand castles", "Where is the time gone to?"]

In [314]:
sentences = np.array([np.array(bpemb_en.embed(x)) for x in sentences_raw])

### Create an RNN, which creates a new vector of the embeddings, by summing them up their dimensions

In [381]:
rnn = keras.layers.SimpleRNN(3, kernel_initializer=keras.initializers.ones, recurrent_initializer=keras.initializers.zeros, activation="tanh")

In [382]:
max_len = max(map(len, sentences))

In [383]:
rnn.build(input_shape=(1,max_len,25))

In [384]:
def calculate_distances(sentences):
    values = {}
    for i in range(len(sentences)):
        # Set the weights manually, so the length of the sentence will be concerned
        rnn.set_weights([rnn.get_weights()[0], tf.constant(1/len(sentences[i]),shape=(3,3)), tf.constant(1/len(sentences[i]),shape=(3))])
        values[sentences_raw[i]] = rnn(np.array([sentences[i]]))
    # Calculate a simple absolute distance, you might want to use another metric for this
    distances = [[np.absolute((l-i2)[0][0]) for i2 in values.values()] for l in values.values()]
    df = pd.DataFrame(index=values.keys(), columns=values.keys(), data=distances)
    return df

In [385]:
calculate_distances(sentences)

Unnamed: 0,I like to eat chicken,I prefer to eat pizza,I really really really really like to eat chicken,Chicken is my absolute favourite dish in the evening,The heaven is dark,I like to build sand castles,Where is the time gone to?
I like to eat chicken,0.0,0.11436,0.075128,0.204332,0.204607,0.174042,0.18274
I prefer to eat pizza,0.11436,0.0,0.189488,0.089972,0.090247,0.059682,0.06838
I really really really really like to eat chicken,0.075128,0.189488,0.0,0.27946,0.279735,0.24917,0.257868
Chicken is my absolute favourite dish in the evening,0.204332,0.089972,0.27946,0.0,0.000275,0.03029,0.021592
The heaven is dark,0.204607,0.090247,0.279735,0.000275,0.0,0.030565,0.021867
I like to build sand castles,0.174042,0.059682,0.24917,0.03029,0.030565,0.0,0.008698
Where is the time gone to?,0.18274,0.06838,0.257868,0.021592,0.021867,0.008698,0.0


#### Have the result