In [2]:
import math
import random

words = ['Ali', 'Mohamed', 'Aya', 'Karim']
word_to_idx = {word: i for i, word in enumerate(words)}
idx_to_word = {i: word for i, word in enumerate(words)}

sequence = ['Ali', 'Mohamed', 'Aya']
target = 'Karim'
X = [word_to_idx[word] for word in sequence]
y = word_to_idx[target]

vocab_size = len(words)
embedding_dim = 10
hidden_dim = 16
learning_rate = 0.01
epochs = 100

def init_weights(rows, cols):
    return [[random.uniform(-0.1, 0.1) for _ in range(cols)] for _ in range(rows)]

W_embed = init_weights(vocab_size, embedding_dim)
W_hh = init_weights(hidden_dim, hidden_dim)
W_xh = init_weights(hidden_dim, embedding_dim)  # Fixed dimensions
b_h = [0.0] * hidden_dim
W_hy = init_weights(vocab_size, hidden_dim)
b_y = [0.0] * vocab_size

def tanh(x):
    return math.tanh(x)

def softmax(x):
    exp = [math.exp(i) for i in x]
    sum_exp = sum(exp)
    return [i/sum_exp for i in exp]

def forward(input_seq):
    h = [0.0] * hidden_dim
    embeddings = [W_embed[x] for x in input_seq]

    for emb in embeddings:
        h_input = [sum(a*b for a,b in zip(emb, W_xh[i])) for i in range(hidden_dim)]
        h_recur = [sum(a*b for a,b in zip(h, W_hh[i])) for i in range(hidden_dim)]
        h = [tanh(h_input[i] + h_recur[i] + b_h[i]) for i in range(hidden_dim)]

    y = [sum(h[i] * W_hy[j][i] for i in range(hidden_dim)) + b_y[j] for j in range(vocab_size)]
    return softmax(y), h

for epoch in range(epochs):
    probs, h = forward(X)
    loss = -math.log(probs[y])

    dy = probs.copy()
    dy[y] -= 1

    dW_hy = [[0.0]*hidden_dim for _ in range(vocab_size)]
    for j in range(vocab_size):
        for i in range(hidden_dim):
            dW_hy[j][i] = h[i] * dy[j]

    db_y = dy.copy()

    dh = [sum(dy[j] * W_hy[j][i] for j in range(vocab_size)) for i in range(hidden_dim)]
    dh = [dh[i] * (1 - h[i]**2) for i in range(hidden_dim)]

    for i in range(hidden_dim):
        for j in range(hidden_dim):
            W_hh[i][j] -= learning_rate * dh[i] * h[j]

    for i in range(hidden_dim):
        for j in range(embedding_dim):
            W_xh[i][j] -= learning_rate * dh[i] * W_embed[X[-1]][j]

    for i in range(hidden_dim):
        b_h[i] -= learning_rate * dh[i]

    for j in range(vocab_size):
        for i in range(hidden_dim):
            W_hy[j][i] -= learning_rate * dW_hy[j][i]

    for i in range(vocab_size):
        b_y[i] -= learning_rate * db_y[i]

    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

probs, _ = forward(X)
predicted_idx = probs.index(max(probs))
predicted_word = idx_to_word[predicted_idx]

print("\nPrediction Results:")
print(f"Input sequence: {sequence}")
print(f"Predicted next word: {predicted_word}")
print(f"Actual next word: {target}")

Epoch 0, Loss: 1.3887
Epoch 10, Loss: 1.3123
Epoch 20, Loss: 1.2400
Epoch 30, Loss: 1.1717
Epoch 40, Loss: 1.1071
Epoch 50, Loss: 1.0462
Epoch 60, Loss: 0.9889
Epoch 70, Loss: 0.9349
Epoch 80, Loss: 0.8841
Epoch 90, Loss: 0.8363

Prediction Results:
Input sequence: ['Ali', 'Mohamed', 'Aya']
Predicted next word: Karim
Actual next word: Karim
