In [1]:
import numpy as np
from collections import Counter

# 1. Corpus
corpus = "I love machine learning and I love deep learning"
corpus = corpus.lower().split()

# 2. Vocabulary
vocab = list(set(corpus))
vocab_size = len(vocab)

word_to_index = {word: i for i, word in enumerate(vocab)}
index_to_word = {i: word for word, i in word_to_index.items()}

# 3. One-hot encoding function

def one_hot(word):
    vector = np.zeros(vocab_size)
    vector[word_to_index[word]] = 1
    return vector


In [2]:
# 4. Generate CBOW Training Data

window_size = 1
X = []
Y = []

for i in range(window_size, len(corpus) - window_size):
    context = []
    for j in range(-window_size, window_size + 1):
        if j != 0:
            context.append(one_hot(corpus[i + j]))

    target = one_hot(corpus[i])

    X.append(np.mean(context, axis=0))  # average context
    Y.append(target)

X = np.array(X)
Y = np.array(Y)


In [3]:
# 5. Initialize Weights

embedding_size = 10
W1 = np.random.randn(vocab_size, embedding_size)
W2 = np.random.randn(embedding_size, vocab_size)

learning_rate = 0.01
epochs = 1000


# 6. Softmax Function

def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / np.sum(exp_x)


In [4]:
# 7. Training CBOW Model

for epoch in range(epochs):
    loss = 0

    for i in range(len(X)):
        # Forward pass
        hidden = np.dot(X[i], W1)
        output = np.dot(hidden, W2)
        y_pred = softmax(output)

        # Loss (cross entropy)
        loss += -np.sum(Y[i] * np.log(y_pred + 1e-9))

        # Backpropagation
        error = y_pred - Y[i]
        dW2 = np.outer(hidden, error)
        dW1 = np.outer(X[i], np.dot(W2, error))

        W2 -= learning_rate * dW2
        W1 -= learning_rate * dW1

    if epoch % 200 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

Epoch 0, Loss: 31.4558
Epoch 200, Loss: 2.1025
Epoch 400, Loss: 1.7211
Epoch 600, Loss: 1.6170
Epoch 800, Loss: 1.5699


In [5]:

def predict_target(context_words):
    context_vectors = [one_hot(word) for word in context_words]
    context_mean = np.mean(context_vectors, axis=0)

    hidden = np.dot(context_mean, W1)
    output = np.dot(hidden, W2)
    y_pred = softmax(output)

    return index_to_word[np.argmax(y_pred)]

# 9. Test Prediction

context = ["love", "learning"]
print("Predicted Target Word:", predict_target(context))

Predicted Target Word: deep
