In [3]:
import numpy as np

text = "I Stady Computitional Neuroscience"

words = text.split()
vocab = list(set(words))
word_to_idx = {word: idx for idx, word in enumerate(vocab)}
idx_to_word = {idx: word for word, idx in word_to_idx.items()}
vocab_size = len(vocab)

input_sequence = [word_to_idx[word] for word in words[:3]]  
target_word = word_to_idx[words[3]]  

def one_hot_encode(idx, vocab_size):
    vec = np.zeros(vocab_size)
    vec[idx] = 1
    return vec

X = np.array([one_hot_encode(idx, vocab_size) for idx in input_sequence])  
Y = one_hot_encode(target_word, vocab_size) 

hidden_size = 5  
learning_rate = 0.1
epochs = 1000

Wxh = np.random.randn(hidden_size, vocab_size) * 0.01  
Whh = np.random.randn(hidden_size, hidden_size) * 0.01  
Why = np.random.randn(vocab_size, hidden_size) * 0.01  
bh = np.zeros((hidden_size, 1)) 
by = np.zeros((vocab_size, 1))  


def forward(X, Wxh, Whh, Why, bh, by):
    h = np.zeros((hidden_size, 1)) 
    hs, ys = {}, {}
    
    for t in range(len(X)):
        xt = X[t].reshape(-1, 1)  
        h = np.tanh(np.dot(Wxh, xt) + np.dot(Whh, h) + bh)
        hs[t] = h
        
    y = np.dot(Why, h) + by
    ys[len(X)] = y 
    
    return hs, ys

def softmax(x):
    exp_x = np.exp(x - np.max(x))  
    return exp_x / np.sum(exp_x)

def loss(Y, Y_pred):
    return -np.sum(Y * np.log(Y_pred))

def backward(X, Y, hs, ys, Wxh, Whh, Why, bh, by):
    dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
    dbh, dby = np.zeros_like(bh), np.zeros_like(by)
    dh_next = np.zeros_like(hs[0])
    
    dy = ys[len(X)] - Y.reshape(-1, 1)
    dWhy += np.dot(dy, hs[len(X)-1].T)
    dby += dy
    
    for t in reversed(range(len(X))):
        dh = np.dot(Why.T, dy) + dh_next
        dh_raw = (1 - hs[t] ** 2) * dh  
        dbh += dh_raw
        dWxh += np.dot(dh_raw, X[t].reshape(1, -1))
        dWhh += np.dot(dh_raw, hs[t-1].T if t > 0 else np.zeros_like(hs[0]).T)
        dh_next = np.dot(Whh.T, dh_raw)
    
    return dWxh, dWhh, dWhy, dbh, dby

for epoch in range(epochs):
    hs, ys = forward(X, Wxh, Whh, Why, bh, by)
    Y_pred = softmax(ys[len(X)].flatten())
    
    current_loss = loss(Y, Y_pred)
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {current_loss}")
    
    dWxh, dWhh, dWhy, dbh, dby = backward(X, Y, hs, ys, Wxh, Whh, Why, bh, by)
    
    Wxh -= learning_rate * dWxh
    Whh -= learning_rate * dWhh
    Why -= learning_rate * dWhy
    bh -= learning_rate * dbh
    by -= learning_rate * dby


    def predict(X, Wxh, Whh, Why, bh, by):
    _, ys = forward(X, Wxh, Whh, Why, bh, by)
    Y_pred = softmax(ys[len(X)].flatten())
    predicted_idx = np.argmax(Y_pred)
    return idx_to_word[predicted_idx]

predicted_word = predict(X, Wxh, Whh, Why, bh, by)
print(f"Predicted 4th word: {predicted_word}")

Epoch 0, Loss: 1.3863777107121773
Epoch 100, Loss: 0.7436774588533017
Epoch 200, Loss: 0.7436683807713242
Epoch 300, Loss: 0.7436683806286812
Epoch 400, Loss: 0.7436683806286793
Epoch 500, Loss: 0.7436683806286793
Epoch 600, Loss: 0.7436683806286793
Epoch 700, Loss: 0.7436683806286793
Epoch 800, Loss: 0.7436683806286793
Epoch 900, Loss: 0.7436683806286793
Predicted 4th word: Neuroscience
