In [3]:
import numpy as np
from scipy.spatial.distance import cdist
from numpy.random import choice

def get_replacement(w, D, k, sensitivity):
    phi_w = D[w]
    
    # Find k similar words to w using D embeddings
    distances = cdist([phi_w], list(D.values()), 'cosine')[0]
    closest_indices = np.argsort(distances)
    sim_set = [list(D.keys())[index] for index in closest_indices]
    
    # Use the exponential algorithm with given sensitivity to pick one word from sim_set
    probabilities = np.exp(-sensitivity * np.sort(distances)[:k])
    probabilities /= np.sum(probabilities)
    picked_word = choice(sim_set, p=probabilities)
    
    return picked_word

# Example usage
if __name__ == "__main__":
    # Load GloVe embeddings
    glove_embeddings = {}
    with open("glove.6B.300d.txt", "r", encoding="utf-8") as file:
        for line in file:
            values = line.split()
            word = values[0]
            vector = np.asarray(values[1:], dtype="float32")
            glove_embeddings[word] = vector
    
    # Get a replacement for a word
    replacement_word = get_replacement('canada', glove_embeddings, k=100000, sensitivity=1.0)
    print(replacement_word)
    # Interactive part
    while True:
        input_word = input("Enter a word (type 'exit' to quit): ")
        if input_word.lower() == 'exit':
            break
        print(get_replacement(input_word, glove_embeddings, k=100000, sensitivity=1.0))

ontario
microsoft
microsoft
uk
tasmania
calgary
