In [2]:
import numpy as np

# 1. Prepare the dataset
text = "Machine learning is powerful"
words = text.split()

# Create a dictionary to map words to indices
word_to_index = {word: idx for idx, word in enumerate(set(words))}
index_to_word = {idx: word for word, idx in word_to_index.items()}

# Convert words to indices
sequences = [[word_to_index[word] for word in words[i:i+3]] for i in range(len(words)-3)]
targets = [word_to_index[words[i+3]] for i in range(len(words)-3)]

# Convert sequences and targets to numpy arrays
X = np.array(sequences)
y = np.array(targets)

# 2. Define the RNN class
class SimpleRNN:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize weights and biases
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Weights for input to hidden layer
        self.Wxh = np.random.randn(hidden_size, input_size) * 0.01
        # Weights for hidden to hidden layer
        self.Whh = np.random.randn(hidden_size, hidden_size) * 0.01
        # Weights for hidden to output layer
        self.Why = np.random.randn(output_size, hidden_size) * 0.01

        # Biases
        self.bh = np.zeros((hidden_size, 1))
        self.by = np.zeros((output_size, 1))

    def forward(self, X):
        h = np.zeros((self.hidden_size, 1))
        for t in range(X.shape[0]):
            x_t = np.zeros((self.input_size, 1))
            x_t[X[t]] = 1

            h = np.tanh(np.dot(self.Wxh, x_t) + np.dot(self.Whh, h) + self.bh)

        y = np.dot(self.Why, h) + self.by
        return y, h

    def predict(self, X):
        output, _ = self.forward(X)
        return np.argmax(output, axis=0)

# 3. Train the RNN
def train(model, X, y, epochs=100, learning_rate=0.01):
    for epoch in range(epochs):
        total_loss = 0
        for i in range(X.shape[0]):
            x_seq = X[i]
            target = y[i]

            # Forward pass
            y_pred, h = model.forward(x_seq)

            # Compute the loss (cross-entropy loss)
            loss = -np.log(y_pred[target])
            total_loss += loss

            # Backward pass (gradient descent)
            dL_dy = y_pred
            dL_dy[target] -= 1

            # Gradients for output layer
            dL_dWhy = np.dot(dL_dy, h.T)
            dL_dby = dL_dy

            # Gradients for hidden layer
            dL_dh = np.dot(model.Why.T, dL_dy)
            dL_dhraw = (1 - h ** 2) * dL_dh
            dL_dWxh = np.dot(dL_dhraw, np.zeros((model.input_size, 1)).T)
            dL_dWhh = np.dot(dL_dhraw, h.T)
            dL_dbh = dL_dhraw

            # Update weights using gradient descent
            model.Wxh -= learning_rate * dL_dWxh
            model.Whh -= learning_rate * dL_dWhh
            model.Why -= learning_rate * dL_dWhy
            model.bh -= learning_rate * dL_dbh
            model.by -= learning_rate * dL_dby

        # Print the loss every 10 epochs
        if epoch % 10 == 0:
            print(f"Epoch {epoch} / Loss: {total_loss}")

# Initialize and train the model
input_size = len(word_to_index)
hidden_size = 10
output_size = len(word_to_index)

rnn = SimpleRNN(input_size, hidden_size, output_size)
train(rnn, X, y)

# 4. Test the model
test_sequence = [word_to_index[word] for word in words[:3]]
prediction = rnn.predict(np.array(test_sequence))
predicted_word = index_to_word[int(prediction)]
print(f"Predicted 4th word: {predicted_word}")


Epoch 0 / Loss: [7.80761297]
Epoch 10 / Loss: [2.34120161]
Epoch 20 / Loss: [1.69906693]
Epoch 30 / Loss: [1.34242418]
Epoch 40 / Loss: [1.10238605]
Epoch 50 / Loss: [0.92591578]
Epoch 60 / Loss: [0.78937043]
Epoch 70 / Loss: [0.68013418]
Epoch 80 / Loss: [0.59067662]
Epoch 90 / Loss: [0.51614184]
Predicted 4th word: powerful


  predicted_word = index_to_word[int(prediction)]
