In [1]:
import numpy as np

chars = ['d', 'o', 'g', 's']
char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for ch, i in char_to_ix.items()}

input_size = output_size = len(chars)  
hidden_size = 4 

np.random.seed(1)
Wx = np.random.randn(hidden_size, input_size) * 0.01 
Wh = np.random.randn(hidden_size, hidden_size) * 0.01  
Wy = np.random.randn(output_size, hidden_size) * 0.01 
bx = np.zeros((hidden_size, 1))
by = np.zeros((output_size, 1))


In [2]:
def one_hot(char):
    vec = np.zeros((input_size, 1))
    vec[char_to_ix[char]] = 1
    return vec

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

def cross_entropy(pred, target_index):
    return -np.log(pred[target_index, 0] + 1e-9)  


In [3]:
inputs = ['d', 'o', 'g']
targets = ['o', 'g', 's']


In [4]:
epochs = 200
learning_rate = 0.1

for epoch in range(epochs):
    h_prev = np.zeros((hidden_size, 1))
    loss = 0
    xs, hs, ys, ps = {}, {}, {}, {}
    hs[-1] = h_prev

    #  FORWARD 
    for t in range(len(inputs)):
        xs[t] = one_hot(inputs[t])
        hs[t] = np.tanh(np.dot(Wx, xs[t]) + np.dot(Wh, hs[t - 1]) + bx)
        ys[t] = np.dot(Wy, hs[t]) + by
        ps[t] = softmax(ys[t])
        loss += cross_entropy(ps[t], char_to_ix[targets[t]])

    #  BACKWARD 
    dWx, dWh, dWy = np.zeros_like(Wx), np.zeros_like(Wh), np.zeros_like(Wy)
    dbx, dby = np.zeros_like(bx), np.zeros_like(by)
    dh_next = np.zeros_like(hs[0])

    for t in reversed(range(len(inputs))):
        dy = np.copy(ps[t])
        dy[char_to_ix[targets[t]]] -= 1
        dWy += np.dot(dy, hs[t].T)
        dby += dy
        dh = np.dot(Wy.T, dy) + dh_next
        dh_raw = (1 - hs[t] ** 2) * dh
        dbx += dh_raw
        dWx += np.dot(dh_raw, xs[t].T)
        dWh += np.dot(dh_raw, hs[t - 1].T)
        dh_next = np.dot(Wh.T, dh_raw)

    for dparam in [dWx, dWh, dWy, dbx, dby]:
        np.clip(dparam, -5, 5, out=dparam)

    Wx -= learning_rate * dWx
    Wh -= learning_rate * dWh
    Wy -= learning_rate * dWy
    bx -= learning_rate * dbx
    by -= learning_rate * dby

    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")


Epoch 0, Loss: 4.1587
Epoch 10, Loss: 3.7293
Epoch 20, Loss: 3.5563
Epoch 30, Loss: 3.3409
Epoch 40, Loss: 2.5232
Epoch 50, Loss: 1.1058
Epoch 60, Loss: 0.4950
Epoch 70, Loss: 0.2831
Epoch 80, Loss: 0.1905
Epoch 90, Loss: 0.1413
Epoch 100, Loss: 0.1114
Epoch 110, Loss: 0.0915
Epoch 120, Loss: 0.0775
Epoch 130, Loss: 0.0671
Epoch 140, Loss: 0.0590
Epoch 150, Loss: 0.0527
Epoch 160, Loss: 0.0475
Epoch 170, Loss: 0.0433
Epoch 180, Loss: 0.0397
Epoch 190, Loss: 0.0367


In [5]:
def predict(start_char, steps=3):
    h = np.zeros((hidden_size, 1))
    x = one_hot(start_char)
    output = start_char

    for _ in range(steps):
        h = np.tanh(np.dot(Wx, x) + np.dot(Wh, h) + bx)
        y = np.dot(Wy, h) + by
        p = softmax(y)
        idx = np.argmax(p)
        char = ix_to_char[idx]
        output += char
        x = one_hot(char)

    return output

print("\nPrediction starting with 'd':")
print(predict('d'))



Prediction starting with 'd':
dogs
