In [13]:
import numpy as np

text = ["barca", "is", "the", "best"]
vocab = sorted(set(text))
word_to_idx = {word: idx for idx, word in enumerate(vocab)}
idx_to_word = {idx: word for idx, word in enumerate(vocab)}
sequence = [word_to_idx[word] for word in text]

input_size = len(vocab)  
hidden_size = 10       
output_size = len(vocab)
learning_rate = 0.01
epochs = 1000

np.random.seed(42)
Wxh = np.random.randn(hidden_size, input_size) * 0.01  
Whh = np.random.randn(hidden_size, hidden_size) * 0.01 
Why = np.random.randn(output_size, hidden_size) * 0.01 
bh = np.zeros((hidden_size, 1))                       
by = np.zeros((output_size, 1))                       

def one_hot(idx, size):
    vec = np.zeros((size, 1))
    vec[idx] = 1
    return vec

def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / np.sum(exp_x, axis=0)

def forward_propagation(inputs, h_prev):
    xs, hs, ys, ps = {}, {}, {}, {}
    hs[-1] = np.copy(h_prev)
    
    for t in range(len(inputs)):
        xs[t] = one_hot(inputs[t], input_size)
        hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh)
        ys[t] = np.dot(Why, hs[t]) + by
        ps[t] = softmax(ys[t])
    
    return xs, hs, ys, ps

def backward_propagation(xs, hs, ps, targets):
    dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
    dbh, dby = np.zeros_like(bh), np.zeros_like(by)
    dh_next = np.zeros_like(hs[0])
    
    for t in reversed(range(len(targets))):
        dy = np.copy(ps[t])
        dy[targets[t]] -= 1  
        
        dWhy += np.dot(dy, hs[t].T)
        dby += dy
        
        dh = np.dot(Why.T, dy) + dh_next
        dh_raw = (1 - hs[t] * hs[t]) * dh  
        
        dbh += dh_raw
        dWxh += np.dot(dh_raw, xs[t].T)
        if t > 0:
            dWhh += np.dot(dh_raw, hs[t-1].T)
        
        dh_next = np.dot(Whh.T, dh_raw)
    
    for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
        np.clip(dparam, -5, 5, out=dparam)
    
    return dWxh, dWhh, dWhy, dbh, dby

h_prev = np.zeros((hidden_size, 1))
for epoch in range(epochs):
    inputs = sequence[:3]  
    target = [sequence[3]]
    xs, hs, ys, ps = forward_propagation(inputs, h_prev)
    loss = -np.log(ps[len(inputs)-1][target[0], 0])
    dWxh, dWhh, dWhy, dbh, dby = backward_propagation(xs, hs, ps, target)
    for param, dparam in zip([Wxh, Whh, Why, bh, by], [dWxh, dWhh, dWhy, dbh, dby]):
        param += -learning_rate * dparam
    h_prev = hs[len(inputs)-1]
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

xs, hs, ys, ps = forward_propagation(sequence[:3], np.zeros((hidden_size, 1)))
predicted_idx = np.argmax(ps[2])
predicted_word = idx_to_word[predicted_idx]
print(f"Predicted word: {predicted_word}")

Epoch 0, Loss: 1.3859
Epoch 100, Loss: 0.8122
Epoch 200, Loss: 0.5090
Epoch 300, Loss: 0.3481
Epoch 400, Loss: 0.2553
Epoch 500, Loss: 0.1970
Epoch 600, Loss: 0.1576
Epoch 700, Loss: 0.1296
Epoch 800, Loss: 0.1090
Epoch 900, Loss: 0.0934
Predicted word: best
