In [3]:
import numpy as np

In [5]:
# Define training sentence and vocabulary
sentence = ["I", "am", "very", "happy"]
vocab = list(set(sentence))
vocab_size = len(vocab)
hidden_size = 4  # You can change this

# Create word-index mappings
word_to_index = {word: idx for idx, word in enumerate(vocab)}
index_to_word = {idx: word for word, idx in word_to_index.items()}

def one_hot_encode(word):
    vec = np.zeros((vocab_size, 1))
    vec[word_to_index[word]] = 1
    return vec

# Initialize RNN weights
np.random.seed(42)
Wx = np.random.randn(hidden_size, vocab_size) * 0.01
Wh = np.random.randn(hidden_size, hidden_size) * 0.01
Wy = np.random.randn(vocab_size, hidden_size) * 0.01

def rnn_forward(inputs, h_prev):
    xs, hs, ys = {}, {}, {}
    hs[-1] = np.copy(h_prev)

    for t in range(len(inputs)):
        xs[t] = one_hot_encode(inputs[t])
        hs[t] = np.tanh(np.dot(Wx, xs[t]) + np.dot(Wh, hs[t-1]))
        ys[t] = np.dot(Wy, hs[t])
        ys[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t]))  # Softmax
    return xs, hs, ys

def compute_loss(ys, target):
    target_idx = word_to_index[target]
    last_output = ys[len(ys) - 1]
    return -np.log(last_output[target_idx, 0])

def rnn_backward(xs, hs, ys, target):
    global Wx, Wh, Wy
    dWx, dWh, dWy = np.zeros_like(Wx), np.zeros_like(Wh), np.zeros_like(Wy)
    dh_next = np.zeros((hidden_size, 1))

    dy = np.copy(ys[len(xs) - 1])
    dy[word_to_index[target]] -= 1

    for t in reversed(range(len(xs))):
        dWy += np.dot(dy, hs[t].T)
        dh = np.dot(Wy.T, dy) + dh_next
        dh_raw = (1 - hs[t] ** 2) * dh
        dWx += np.dot(dh_raw, xs[t].T)
        dWh += np.dot(dh_raw, hs[t - 1].T)
        dh_next = np.dot(Wh.T, dh_raw)
        dy = np.zeros_like(dy)  # Only first dy affects loss

    # Clip gradients to avoid exploding gradients
    for dparam in [dWx, dWh, dWy]:
        np.clip(dparam, -5, 5, out=dparam)

    return dWx, dWh, dWy

def train_rnn(inputs, target, epochs=100, learning_rate=0.1):
    global Wx, Wh, Wy
    h_prev = np.zeros((hidden_size, 1))

    for epoch in range(epochs):
        xs, hs, ys = rnn_forward(inputs, h_prev)
        loss = compute_loss(ys, target)
        dWx, dWh, dWy = rnn_backward(xs, hs, ys, target)

        Wx -= learning_rate * dWx
        Wh -= learning_rate * dWh
        Wy -= learning_rate * dWy

        if epoch % 10 == 0:
            print(f"Epoch {epoch}, Loss: {loss:.4f}")
    return ys

def predict_next(inputs):
    h_prev = np.zeros((hidden_size, 1))
    _, _, ys = rnn_forward(inputs, h_prev)
    final_output = ys[len(inputs) - 1]
    pred_idx = np.argmax(final_output)
    return index_to_word[pred_idx]

In [7]:
# Set up training inputs
input_sequence = ["I", "am", "very"]
target_word = "happy"

print("Training the RNN model...")
train_rnn(input_sequence, target_word, epochs=100, learning_rate=0.1)

# Prediction
predicted_word = predict_next(input_sequence)
print(f"\nInput Sequence: {input_sequence}")
print(f"Predicted Word: {predicted_word}")
print(f"Actual Word: {target_word}")

Training the RNN model...
Epoch 0, Loss: 1.3863
Epoch 10, Loss: 1.3855
Epoch 20, Loss: 1.3820
Epoch 30, Loss: 1.3641
Epoch 40, Loss: 1.2792
Epoch 50, Loss: 0.9901
Epoch 60, Loss: 0.5421
Epoch 70, Loss: 0.2674
Epoch 80, Loss: 0.1534
Epoch 90, Loss: 0.1017

Input Sequence: ['I', 'am', 'very']
Predicted Word: happy
Actual Word: happy
