In [1]:
import numpy as np

text = "hala madrid for ever"
words = text.split()

if len(words) != 4:
    print(f"Error: The text must contain exactly 4 words. This text has {len(words)} words.")
    exit()

vocab = sorted(list(set(words)))
vocab_size = len(vocab)

word_to_ix = {word: i for i, word in enumerate(vocab)}
ix_to_word = {i: word for i, word in enumerate(vocab)}

print(f"Vocabulary ({vocab_size} words): {vocab}")
print(f"Word to Index Mapping: {word_to_ix}")

def word_to_one_hot(word, vocab_size, word_to_ix_map):
    vector = np.zeros((vocab_size, 1))
    if word in word_to_ix_map:
        vector[word_to_ix_map[word]] = 1
    return vector

hidden_size = 5

W_xh = np.random.randn(hidden_size, vocab_size) * 0.1
W_hh = np.random.randn(hidden_size, hidden_size) * 0.1
W_ho = np.random.randn(vocab_size, hidden_size) * 0.1

input_words = words[:3]
target_word = words[3]

print(f"\nProcessing sequence: {' '.join(input_words)}")
print(f"Target word: '{target_word}'")

h = np.zeros((hidden_size, 1))
print(f"Initial hidden state:\n{h.T}\n")

for word in input_words:
    print(f"Processing word: '{word}'")
    x = word_to_one_hot(word, vocab_size, word_to_ix)
    h = np.dot(W_hh, h) + np.dot(W_xh, x)
    print(f"Hidden state after '{word}':\n{h.T}\n")

print("Predicting next word using final hidden state...")

output_scores = np.dot(W_ho, h)

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

prediction_probs = softmax(output_scores)
predicted_index = np.argmax(prediction_probs)
predicted_word = ix_to_word[predicted_index]

print(f"Output scores over vocabulary:\n{output_scores.T}")
print(f"\nProbability distribution:\n{prediction_probs.T}")
print(f"\nPredicted word (index {predicted_index}): {predicted_word}")
print(f"Actual target word: {target_word}")

Vocabulary (4 words): ['ever', 'for', 'hala', 'madrid']
Word to Index Mapping: {'ever': 0, 'for': 1, 'hala': 2, 'madrid': 3}

Processing sequence: hala madrid for
Target word: 'ever'
Initial hidden state:
[[0. 0. 0. 0. 0.]]

Processing word: 'hala'
Hidden state after 'hala':
[[-0.0272799  -0.09891179  0.14776947 -0.13672479  0.00508372]]

Processing word: 'madrid'
Hidden state after 'madrid':
[[ 0.05847741 -0.05651943  0.15282764 -0.01426258 -0.00981724]]

Processing word: 'for'
Hidden state after 'for':
[[-0.02912967  0.10223692 -0.16771342 -0.03506484 -0.13066113]]

Predicting next word using final hidden state...
Output scores over vocabulary:
[[ 0.01394477 -0.00610167 -0.01369307  0.02476845]]

Probability distribution:
[[0.25228468 0.24727762 0.24540755 0.25503016]]

Predicted word (index 3): madrid
Actual target word: ever
