In [1]:
import numpy as np
from collections import defaultdict

corpus = [    "I love chocolate",    "I love ice cream",    "I enjoy playing tennis"]
# Initialize vocabulary and co-occurrence matrix
vocab = set()
co_occurrence = defaultdict(float)

window_size = 4
# Iterate through the corpus to build vocabulary and co-occurrence matrix
for sentence in corpus:
    words = sentence.split()
    for i in range(len(words)):
        word = words[i]
        vocab.add(word)
        for j in range(max(0, i - window_size), min(i + window_size + 1, len(words))):
            if i != j:
                co_occurrence[(word, words[j])] += 1.0 / abs(i - j)

In [2]:
embedding_dim = 10
word_embeddings = {
    word: np.random.randn(embedding_dim) for word in vocab
}


In [4]:
learning_rate = 0.1
num_epochs = 100

# Gradient descent to update word embeddings
for epoch in range(num_epochs):
    total_loss = 0
    for (word_i, word_j), observed_count in co_occurrence.items():
        # Calculate dot product of word embeddings
        dot_product = np.dot(word_embeddings[word_i], word_embeddings[word_j])
        
        # Calculate difference and update
        diff = dot_product - np.log(observed_count)
        total_loss += 0.5 * diff**2
        gradient = diff * word_embeddings[word_j]
        word_embeddings[word_i] -= learning_rate * gradient
        
    print(f"Epoch: {epoch+1}, Loss: {total_loss}")

Epoch: 1, Loss: 89.97929727303624
Epoch: 2, Loss: 12.197910197729238
Epoch: 3, Loss: 3.101676394753521
Epoch: 4, Loss: 1.5132907446766268
Epoch: 5, Loss: 0.770482344061508
Epoch: 6, Loss: 0.3910012992196198
Epoch: 7, Loss: 0.19614295677457114
Epoch: 8, Loss: 0.0972136033069461
Epoch: 9, Loss: 0.04769208881595417
Epoch: 10, Loss: 0.023212053909621423
Epoch: 11, Loss: 0.011230902067115248
Epoch: 12, Loss: 0.005410738831380893
Epoch: 13, Loss: 0.002598804010501915
Epoch: 14, Loss: 0.001245534864262812
Epoch: 15, Loss: 0.0005960511541364559
Epoch: 16, Loss: 0.0002849407788397199
Epoch: 17, Loss: 0.0001361157149058133
Epoch: 18, Loss: 6.498927946485012e-05
Epoch: 19, Loss: 3.1018630704251345e-05
Epoch: 20, Loss: 1.4801234587656759e-05
Epoch: 21, Loss: 7.0615532646935965e-06
Epoch: 22, Loss: 3.3686202588223185e-06
Epoch: 23, Loss: 1.6068265044191168e-06
Epoch: 24, Loss: 7.664111482485921e-07
Epoch: 25, Loss: 3.6554259314848235e-07
Epoch: 26, Loss: 1.7434225590372456e-07
Epoch: 27, Loss: 8.31