## Environment Setup and Data Loading

In [3]:
import numpy as np
import matplotlib.pyplot as plt
from gensim.models import KeyedVectors
from gensim.scripts.glove2word2vec import glove2word2vec
from sklearn.decomposition import PCA
import pprint

# Configuration
# Note: You must have 'glove.6B.100d.txt' in your working directory
GLOVE_INPUT_FILE = 'glove.6B.100d.txt'
WORD2VEC_OUTPUT_FILE = 'glove.6B.100d.word2vec'

# Convert GloVe format to Word2Vec format for Gensim compatibility
glove2word2vec(GLOVE_INPUT_FILE, WORD2VEC_OUTPUT_FILE)
model = KeyedVectors.load_word2vec_format(WORD2VEC_OUTPUT_FILE, binary=False)

print(f"Model loaded with {len(model)} unique word vectors.")

  glove2word2vec(GLOVE_INPUT_FILE, WORD2VEC_OUTPUT_FILE)


Model loaded with 400000 unique word vectors.


## Semantic Proximity and "Word Math"

In [4]:
def solve_analogy(a, b, c):
    """
    Solves analogies of the form: 'a' is to 'b' as 'c' is to '?'
    Equation: b - a + c
    """
    result = model.most_similar(positive=[c, b], negative=[a])
    return result[0][0]

# Examples
print(f"Paris is to France as Berlin is to: {solve_analogy('france', 'paris', 'germany')}")
print(f"Man is to King as Woman is to: {solve_analogy('man', 'king', 'woman')}")

Paris is to France as Berlin is to: berlin
Man is to King as Woman is to: queen
