In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
!pip install gensim

Collecting gensim
  Downloading gensim-4.4.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (8.4 kB)
Downloading gensim-4.4.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (27.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.9/27.9 MB[0m [31m38.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: gensim
Successfully installed gensim-4.4.0


In [None]:
import gensim.downloader as api
print("Downloading and loading GloVe model...")
model = api.load("glove-wiki-gigaword-50")
print("GloVe model loaded successfully!")

Downloading and loading GloVe model...
GloVe model loaded successfully!


In [None]:
vocabulary_size = len(model.key_to_index)
print(f"Vocabulary size: {vocabulary_size}")

Vocabulary size: 400000


In [None]:
example_words = ['king', 'queen', 'man', 'woman', 'apple', 'banana', 'computer']

for word in example_words:
    if word in model:
        print(f"Vector for '{word}':")
        print(model[word])
        print(f"Vector dimension: {len(model[word])}\n")
    else:
        print(f"'{word}' not found in vocabulary.\n")

Vector for 'king':
[ 0.50451   0.68607  -0.59517  -0.022801  0.60046  -0.13498  -0.08813
  0.47377  -0.61798  -0.31012  -0.076666  1.493    -0.034189 -0.98173
  0.68229   0.81722  -0.51874  -0.31503  -0.55809   0.66421   0.1961
 -0.13495  -0.11476  -0.30344   0.41177  -2.223    -1.0756   -1.0783
 -0.34354   0.33505   1.9927   -0.04234  -0.64319   0.71125   0.49159
  0.16754   0.34344  -0.25663  -0.8523    0.1661    0.40102   1.1685
 -1.0137   -0.21585  -0.15155   0.78321  -0.91241  -1.6106   -0.64426
 -0.51042 ]
Vector dimension: 50

Vector for 'queen':
[ 0.37854    1.8233    -1.2648    -0.1043     0.35829    0.60029
 -0.17538    0.83767   -0.056798  -0.75795    0.22681    0.98587
  0.60587   -0.31419    0.28877    0.56013   -0.77456    0.071421
 -0.5741     0.21342    0.57674    0.3868    -0.12574    0.28012
  0.28135   -1.8053    -1.0421    -0.19255   -0.55375   -0.054526
  1.5574     0.39296   -0.2475     0.34251    0.45365    0.16237
  0.52464   -0.070272  -0.83744   -1.0326     0.

In [None]:
import numpy as np
import gensim.downloader as api

def load_glove_from_api(model_name):
    print(f"Downloading and loading {model_name} model...")
    model = api.load(model_name)
    print(f"{model_name} model loaded successfully!")
    return model

# Load the 300-dimensional GloVe model using gensim.downloader
glove = load_glove_from_api("glove-wiki-gigaword-300")

def cosine_similarity(vec1, vec2):
    # Ensure vectors are numpy arrays for dot product and norm calculations
    vec1 = np.asarray(vec1, dtype='float32')
    vec2 = np.asarray(vec2, dtype='float32')

    dot = np.dot(vec1, vec2)
    norm1 = np.linalg.norm(vec1)
    norm2 = np.linalg.norm(vec2)
    # Handle case where one or both norms are zero to avoid division by zero
    if norm1 == 0 or norm2 == 0:
        return 0.0
    return dot / (norm1 * norm2)

pairs = [
    ("doctor", "nurse"),
    ("cat", "dog"),
    ("car", "bus"),
    ("king", "queen"),
    ("apple", "orange"),
    ("table", "chair"),
    ("sun", "moon"),
    ("teacher", "student"),
    ("computer", "internet"),
    ("happy", "joyful")
]

print("\nCalculating cosine similarities:")
for w1, w2 in pairs:
    if w1 in glove and w2 in glove:
        sim = cosine_similarity(glove[w1], glove[w2])
        print(f"{w1} – {w2}: {sim:.2f}")
    else:
        print(f"Skipping pair ({w1}, {w2}): One or both words not found in vocabulary.")


Downloading and loading glove-wiki-gigaword-300 model...
glove-wiki-gigaword-300 model loaded successfully!

Calculating cosine similarities:
doctor – nurse: 0.59
cat – dog: 0.68
car – bus: 0.57
king – queen: 0.63
apple – orange: 0.32
table – chair: 0.44
sun – moon: 0.48
teacher – student: 0.69
computer – internet: 0.59
happy – joyful: 0.48
