#Implement GloVe using numpy gradient descent.

In [4]:
import numpy as np

# Sample vocabulary and cooccurrence matrix
vocab_size = 5
vocab = ['apple', 'orange', 'banana', 'grape', 'kiwi']
cooccurrence_matrix = np.array([[0, 3, 1, 2, 0],
                                [3, 0, 2, 1, 0],
                                [1, 2, 0, 0, 3],
                                [2, 1, 0, 0, 2],
                                [0, 0, 3, 2, 0]])

# Initialize parameters
embedding_size = 3
learning_rate = 0.05
num_epochs = 100

center_word_vectors = np.random.randn(vocab_size, embedding_size)
context_word_vectors = np.random.randn(vocab_size, embedding_size)
center_word_biases = np.zeros(vocab_size)
context_word_biases = np.zeros(vocab_size)

# Training GloVe
for epoch in range(num_epochs):
    # Calculate gradients
    inner = np.dot(center_word_vectors, context_word_vectors.T) + center_word_biases[:, np.newaxis] + context_word_biases[np.newaxis, :] - np.log(cooccurrence_matrix)
    weighting = np.power(np.minimum(1.0, (cooccurrence_matrix / 100)), 0.75)
    grad_inner = weighting * (inner - np.log(cooccurrence_matrix))

    grad_center_word_vectors = np.dot(grad_inner, context_word_vectors)
    grad_context_word_vectors = np.dot(grad_inner.T, center_word_vectors)
    grad_center_word_biases = np.sum(grad_inner, axis=1)
    grad_context_word_biases = np.sum(grad_inner, axis=0)

    # Clip gradients to avoid NaN values
    clip_value = 1.0
    grad_center_word_vectors = np.clip(grad_center_word_vectors, -clip_value, clip_value)
    grad_context_word_vectors = np.clip(grad_context_word_vectors, -clip_value, clip_value)
    grad_center_word_biases = np.clip(grad_center_word_biases, -clip_value, clip_value)
    grad_context_word_biases = np.clip(grad_context_word_biases, -clip_value, clip_value)

    # Update parameters
    center_word_vectors -= learning_rate * grad_center_word_vectors
    context_word_vectors -= learning_rate * grad_context_word_vectors
    center_word_biases -= learning_rate * grad_center_word_biases
    context_word_biases -= learning_rate * grad_context_word_biases

    # Calculate and print cost (for monitoring purposes)
    cost = 0.5 * np.sum(weighting * np.square(inner - np.log(cooccurrence_matrix)))
    print(f"Epoch {epoch + 1}/{num_epochs}, Cost: {cost}")

# Print learned word vectors
print("Learned Word Vectors:")
print(center_word_vectors)


Epoch 1/100, Cost: nan
Epoch 2/100, Cost: nan
Epoch 3/100, Cost: nan
Epoch 4/100, Cost: nan
Epoch 5/100, Cost: nan
Epoch 6/100, Cost: nan
Epoch 7/100, Cost: nan
Epoch 8/100, Cost: nan
Epoch 9/100, Cost: nan
Epoch 10/100, Cost: nan
Epoch 11/100, Cost: nan
Epoch 12/100, Cost: nan
Epoch 13/100, Cost: nan
Epoch 14/100, Cost: nan
Epoch 15/100, Cost: nan
Epoch 16/100, Cost: nan
Epoch 17/100, Cost: nan
Epoch 18/100, Cost: nan
Epoch 19/100, Cost: nan
Epoch 20/100, Cost: nan
Epoch 21/100, Cost: nan
Epoch 22/100, Cost: nan
Epoch 23/100, Cost: nan
Epoch 24/100, Cost: nan
Epoch 25/100, Cost: nan
Epoch 26/100, Cost: nan
Epoch 27/100, Cost: nan
Epoch 28/100, Cost: nan
Epoch 29/100, Cost: nan
Epoch 30/100, Cost: nan
Epoch 31/100, Cost: nan
Epoch 32/100, Cost: nan
Epoch 33/100, Cost: nan
Epoch 34/100, Cost: nan
Epoch 35/100, Cost: nan
Epoch 36/100, Cost: nan
Epoch 37/100, Cost: nan
Epoch 38/100, Cost: nan
Epoch 39/100, Cost: nan
Epoch 40/100, Cost: nan
Epoch 41/100, Cost: nan
Epoch 42/100, Cost: nan
E

  inner = np.dot(center_word_vectors, context_word_vectors.T) + center_word_biases[:, np.newaxis] + context_word_biases[np.newaxis, :] - np.log(cooccurrence_matrix)
  grad_inner = weighting * (inner - np.log(cooccurrence_matrix))
  grad_inner = weighting * (inner - np.log(cooccurrence_matrix))
  cost = 0.5 * np.sum(weighting * np.square(inner - np.log(cooccurrence_matrix)))
  cost = 0.5 * np.sum(weighting * np.square(inner - np.log(cooccurrence_matrix)))


In [5]:
!pip install spacy



In [6]:
!python -m spacy download en_core_web_md

2023-11-27 14:09:01.853282: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-27 14:09:01.853364: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-27 14:09:01.853416: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-27 14:09:01.866687: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Collecting en-core-web-md==3.6.0
  Downloading htt

In [7]:
import spacy

nlp = spacy.load("en_core_web_md")
parsed_text = nlp("This is a sentence")

glove_vectors = [w.vector for w in parsed_text]

In [8]:
sentence_vector = nlp("This is a sentence").vector

In [9]:
import numpy as np
import spacy
from sklearn.base import BaseEstimator, TransformerMixin

class GloveVectorizer(BaseEstimator, TransformerMixin):
    def __init__(self, model_name="en_core_web_md"):
        self._nlp = spacy.load(model_name)

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return np.concatenate([self._nlp(doc).vector.reshape(1, -1) for doc in X])

In [10]:
X = [
    "I like cats",
    "I like dogs",
    "Show me a rabbit",
    "I like hot chocolate",
    "I want to live on the moon",
    "What time is dinner?",
]

In [11]:
y = [True, True, True, False, False, False]

In [12]:
X_test = [
    "Where is my hamster",
    "I own a nice car",
    "I can ride a horse",
    "What are the chances of that?",
]

In [13]:
from sklearn.pipeline import make_pipeline
from sklearn.svm import LinearSVC

g = GloveVectorizer()
svm = LinearSVC()

pipeline = make_pipeline(g, svm)

pipeline.fit(X, y)

y_pred = pipeline.predict(X_test)
print(y_pred)

[False  True  True False]
