In [2]:
GLOVE_PATH = r"C:\Users\Lenovo\Desktop\glove\glove.6B.50d.txt"

In [4]:
GLOVE_PATH = r"C:\Users\Lenovo\Desktop\glove.6B.50d.txt"

In [5]:
import numpy as np

def load_glove(path, limit=40000):  # limit to keep it light
    vectors = {}
    dim = None
    with open(path, "r", encoding="utf-8") as f:
        for i, line in enumerate(f):
            if i >= limit:
                break
            parts = line.rstrip().split(" ")
            word = parts[0]
            vec = np.array(parts[1:], dtype=np.float32)
            if dim is None:
                dim = vec.size
            vectors[word] = vec
    return vectors, dim

glove, dim = load_glove(GLOVE_PATH, limit=40000)
print("Loaded words:", len(glove), "| dim:", dim)
print("Has king/queen?", "king" in glove, "queen" in glove)

Loaded words: 40000 | dim: 50
Has king/queen? True True


In [6]:
import numpy as np

def cosine(u, v):
    denom = (np.linalg.norm(u) * np.linalg.norm(v))
    return 0.0 if denom == 0 else float(np.dot(u, v) / denom)

def most_similar(word, vectors, topn=10):
    if word not in vectors:
        return f"'{word}' not in vocab"
    wv = vectors[word]
    scores = [(w, cosine(wv, v)) for w, v in vectors.items() if w != word]
    scores.sort(key=lambda x: x[1], reverse=True)
    return scores[:topn]

def analogy(a, b, c, vectors, topn=10):
    for w in (a, b, c):
        if w not in vectors:
            return f"Missing '{w}'"
    target = vectors[b] - vectors[a] + vectors[c]
    scores = [(w, cosine(target, v)) for w, v in vectors.items() if w not in {a, b, c}]
    scores.sort(key=lambda x: x[1], reverse=True)
    return scores[:topn]

print("Similar to 'king':")
print(most_similar("king", glove, 10))

print("\nAnalogy: king - man + woman ≈ ?")
print(analogy("man", "king", "woman", glove, 10))

Similar to 'king':
[('prince', 0.8236179947853088), ('queen', 0.7839043736457825), ('ii', 0.7746230363845825), ('emperor', 0.7736247777938843), ('son', 0.766719400882721), ('uncle', 0.7627151608467102), ('kingdom', 0.7542161345481873), ('throne', 0.753991425037384), ('brother', 0.7492412328720093), ('ruler', 0.7434253692626953)]

Analogy: king - man + woman ≈ ?
[('queen', 0.8609581589698792), ('daughter', 0.7684511542320251), ('prince', 0.7640699148178101), ('throne', 0.763496994972229), ('princess', 0.7512727975845337), ('elizabeth', 0.7506488561630249), ('father', 0.7314497232437134), ('kingdom', 0.7296158671379089), ('mother', 0.7280009984970093), ('son', 0.7279537320137024)]


In [7]:
from sklearn.decomposition import PCA

word_groups = {
    "royalty": ["king", "queen", "prince", "princess", "royal"],
    "gender": ["man", "woman", "boy", "girl"],
    "space": ["space", "planet", "star", "galaxy", "nasa"],
    "vehicles": ["car", "truck", "vehicle", "engine", "road"]
}

words, labels = [], []
for g, ws in word_groups.items():
    for w in ws:
        if w in glove:
            words.append(w)
            labels.append(g)

X = np.vstack([glove[w] for w in words])
pca = PCA(n_components=2, random_state=42)
X2 = pca.fit_transform(X)

coords = list(zip(words, labels, X2[:,0], X2[:,1]))
coords[:10]

[('king', 'royalty', np.float32(-2.6388626), np.float32(1.4220173)),
 ('queen', 'royalty', np.float32(-3.0095558), np.float32(0.8326983)),
 ('prince', 'royalty', np.float32(-2.723673), np.float32(1.0372595)),
 ('princess', 'royalty', np.float32(-3.199498), np.float32(0.17144531)),
 ('royal', 'royalty', np.float32(-1.8607177), np.float32(1.3025954)),
 ('man', 'gender', np.float32(-1.1301731), np.float32(-1.1911842)),
 ('woman', 'gender', np.float32(-1.8431629), np.float32(-1.8229182)),
 ('boy', 'gender', np.float32(-1.3845654), np.float32(-1.7059687)),
 ('girl', 'gender', np.float32(-2.0030417), np.float32(-2.1948884)),
 ('space', 'space', np.float32(2.446632), np.float32(1.9722303))]

In [8]:
print("GloVe has 'beautifullness'?", "beautifullness" in glove)
print("GloVe has 'beautiful'?", "beautiful" in glove)

GloVe has 'beautifullness'? False
GloVe has 'beautiful'? True
