In [2]:
import numpy as np

vocab = ["this", "girl", "named", "salma"]
vocab_size = len(vocab)

word_to_idx = {word: idx for idx, word in enumerate(vocab)}
idx_to_word = {idx: word for word, idx in word_to_idx.items()}

inputs = ["this", "girl", "named"]
target = "salma"

def one_hot_vector(word_idx, vocab_size):
    vec = np.zeros((vocab_size,))
    vec[word_idx] = 1
    return vec

input_size = vocab_size
hidden_size = 4
output_size = vocab_size
learning_rate = 0.1

w1 = np.random.randn(hidden_size, input_size) * 0.01
w2 = np.random.randn(hidden_size, hidden_size) * 0.01
w3 = np.random.randn(output_size, hidden_size) * 0.01

b1 = np.zeros((hidden_size, 1))
b2 = np.zeros((output_size, 1))

for epoch in range(1000):
    h_prev = np.zeros((hidden_size, 1))
    xs, hs = {}, {}
    hs[-1] = np.copy(h_prev)

    for t, word in enumerate(inputs):
        idx = word_to_idx[word]
        xs[t] = one_hot_vector(idx, vocab_size).reshape(-1, 1)
        h = np.tanh(np.dot(w1, xs[t]) + np.dot(w2, hs[t-1]) + b1)
        hs[t] = h

    y = np.dot(w3, hs[len(inputs)-1]) + b2
    y_pred = np.exp(y) / np.sum(np.exp(y))

    target_idx = word_to_idx[target]
    loss = -np.log(y_pred[target_idx]).item()

    dy = np.copy(y_pred)
    dy[target_idx] -= 1

    dw3 = np.dot(dy, hs[len(inputs)-1].T)
    db2 = dy

    dh = np.dot(w3.T, dy)
    dw1 = np.zeros_like(w1)
    dw2 = np.zeros_like(w2)
    db1 = np.zeros_like(b1)

    for t in reversed(range(len(inputs))):
        dh_raw = (1 - hs[t] * hs[t]) * dh
        dw1 += np.dot(dh_raw, xs[t].T)
        dw2 += np.dot(dh_raw, hs[t-1].T)
        db1 += dh_raw
        dh = np.dot(w2.T, dh_raw)

    for dparam in [dw1, dw2, dw3, db1, db2]:
        np.clip(dparam, -5, 5, out=dparam)

    w1 -= learning_rate * dw1
    w2 -= learning_rate * dw2
    w3 -= learning_rate * dw3
    b1 -= learning_rate * db1
    b2 -= learning_rate * db2

    if epoch % 100 == 0:
        print(f'Epoch {epoch}, Loss: {loss:.4f}')

h_prev = np.zeros((hidden_size, 1))
for word in inputs:
    x = one_hot_vector(word_to_idx[word], vocab_size).reshape(-1, 1)
    h_prev = np.tanh(np.dot(w1, x) + np.dot(w2, h_prev) + b1)

y = np.dot(w3, h_prev) + b2
y_pred = np.exp(y) / np.sum(np.exp(y))

predicted_idx = np.argmax(y_pred)
predicted_word = idx_to_word[predicted_idx]

print("\nPredicted word:", predicted_word)


Epoch 0, Loss: 1.3865
Epoch 100, Loss: 0.0871
Epoch 200, Loss: 0.0330
Epoch 300, Loss: 0.0173
Epoch 400, Loss: 0.0108
Epoch 500, Loss: 0.0075
Epoch 600, Loss: 0.0056
Epoch 700, Loss: 0.0044
Epoch 800, Loss: 0.0036
Epoch 900, Loss: 0.0030

Predicted word: salma
