In [5]:
import numpy as np
import gensim
import gensim.models.keyedvectors as word2vec
from gensim.scripts.glove2word2vec import glove2word2vec
from gensim.models import KeyedVectorsl
from gensim.models.fasttext import FastText

In [6]:
ft_model = KeyedVectors.load_word2vec_format("fasttext.vec", binary=False)

In [7]:
w2v_model = word2vec.KeyedVectors.load_word2vec_format('word2vec.bin', binary=True)

In [8]:
glove_input_file = 'glove.txt'
word2vec_output_file = 'glove.word2vec.txt'
glove2word2vec(glove_input_file, word2vec_output_file)
glove_model = KeyedVectors.load_word2vec_format(word2vec_output_file, binary=False)

In [9]:
def get_word_embedding(word, model):
    try:
        embedding = model[word]
        return embedding
    except KeyError:
        return None

In [13]:
def ensemble_word_embedding(word, models):
    embeddings = [get_word_embedding(word, model) for model in models]
    embeddings = [embedding for embedding in embeddings if embedding is not None]
    
    if len(embeddings) == 0:
        return None
    
    max_dim = max(embedding.shape[0] for embedding in embeddings)
    embeddings = [embedding if embedding.shape[0] == max_dim else np.concatenate((embedding, np.zeros(max_dim - embedding.shape[0])) ) for embedding in embeddings]
    
    ensemble_embedding = np.mean(embeddings, axis=0)
    return ensemble_embedding

In [14]:
models = [w2v_model.wv, glove_model, ft_model.wv]

  """Entry point for launching an IPython kernel.


In [23]:
word = "queen"
embedding = ensemble_word_embedding(word, models)
if embedding is not None:
    print(f"Embedding for '{word}':")
    print(embedding)
else:
    print(f"No embedding found for '{word}'.")

Embedding for 'queen':
[-2.06146987e-01 -1.25648969e-02 -9.96286472e-02  1.34868383e-01
  1.42938649e-01 -2.14799064e-01  1.68100314e-01 -2.79222926e-02
  6.51457657e-04 -3.27286450e-02 -2.84494539e-01 -3.90831244e-02
 -2.03525372e-01  8.09745528e-02  6.54405008e-02  4.14064334e-01
  1.46647971e-01  2.30816677e-02 -4.40540089e-02  2.30262753e-01
  1.64959895e-01 -1.30604173e-02 -1.10536709e-01  1.03052557e-01
  2.13383221e-01 -1.47872182e-02 -7.94125833e-02 -6.50443733e-02
  2.55528857e-01  1.09339267e-01  2.35399995e-01 -1.33679794e-01
  5.67501535e-02 -4.19431273e-02 -1.79206252e-01 -3.44255308e-01
  3.22604689e-02  7.96852907e-02 -7.01098541e-02  3.54079157e-02
 -3.32888017e-02 -3.51718547e-01  2.25180417e-01 -2.30039991e-01
  4.26020833e-02 -9.35498920e-03 -6.77766229e-02  1.53539275e-01
  3.62143336e-02 -4.89906222e-02  7.26085529e-02 -2.18465738e-01
  1.18742841e-01  9.31000213e-03  6.03480184e-02  1.52554840e-01
 -1.54846663e-01  5.55794750e-02 -1.68304053e-01  6.01126527e-02
 -

In [29]:
def compare_vectors_cosine_similarity(vector1, vector2):
    similarity = np.dot(vector1, vector2) / (np.linalg.norm(vector1) * np.linalg.norm(vector2))
    return similarity

compare_word1 = "king"
compare_word2 = "princess"

compare_embedding1 = ensemble_word_embedding(compare_word1, models)
compare_embedding2 = ensemble_word_embedding(compare_word2, models)

if compare_embedding1 is not None and compare_embedding2 is not None:
    print(f"Comparison between '{word}' and '{compare_word1}':")
    similarity1 = compare_vectors_cosine_similarity(embedding, compare_embedding1)
    print(f"Cosine similarity: {similarity1}")

    print(f"Comparison between '{word}' and '{compare_word2}':")
    similarity2 = compare_vectors_cosine_similarity(embedding, compare_embedding2)
    print(f"Cosine similarity: {similarity2}")
else:
    print("No comparison can be made due to missing embeddings.")


Comparison between 'queen' and 'king':
Cosine similarity: 0.6888364900233708
Comparison between 'queen' and 'princess':
Cosine similarity: 0.7175951708973198


In [30]:
def compare_vectors_cosine_similarity(vector1, vector2):
    similarity = np.dot(vector1, vector2) / (np.linalg.norm(vector1) * np.linalg.norm(vector2))
    return similarity

compare_word1 = "car"
compare_word2 = "laptop"

compare_embedding1 = ensemble_word_embedding(compare_word1, models)
compare_embedding2 = ensemble_word_embedding(compare_word2, models)

if compare_embedding1 is not None and compare_embedding2 is not None:
    print(f"Comparison between '{word}' and '{compare_word1}':")
    similarity1 = compare_vectors_cosine_similarity(embedding, compare_embedding1)
    print(f"Cosine similarity: {similarity1}")

    print(f"Comparison between '{word}' and '{compare_word2}':")
    similarity2 = compare_vectors_cosine_similarity(embedding, compare_embedding2)
    print(f"Cosine similarity: {similarity2}")
else:
    print("No comparison can be made due to missing embeddings.")

Comparison between 'queen' and 'car':
Cosine similarity: 0.282770693820894
Comparison between 'queen' and 'laptop':
Cosine similarity: 0.17996554165373016


#### 

### Pre-Trained Word vectors were downloaded from the site https://developer.syn.co.in/tutorial/bot/oscova/pretrained-vectors.html

#### 