In [1]:
import numpy as np

# إعداد البيانات
data = [
    (['I', 'love', 'deep'], 'learning'),
    (['deep', 'learning', 'is'], 'awesome'),
    (['I', 'enjoy', 'machine'], 'learning'),
    (['machine', 'learning', 'is'], 'fun')
]

# بناء قاموس الكلمات
words = list(set([word for sentence, target in data for word in sentence] + [target for _, target in data]))
word_to_idx = {word: idx for idx, word in enumerate(words)}
idx_to_word = {idx: word for word, idx in word_to_idx.items()}
vocab_size = len(words)

# إعداد البراميترز
hidden_size = 16  # حجم الذاكرة المخفية
learning_rate = 0.01

# الوزنات
Wxh = np.random.randn(hidden_size, vocab_size) * 0.01  # input to hidden
Whh = np.random.randn(hidden_size, hidden_size) * 0.01  # hidden to hidden
Why = np.random.randn(vocab_size, hidden_size) * 0.01  # hidden to output
bh = np.zeros((hidden_size, 1))  # bias hidden
by = np.zeros((vocab_size, 1))   # bias output

# تحويل الكلمات لـ One-hot
def word_to_one_hot(word):
    one_hot = np.zeros((vocab_size, 1))
    one_hot[word_to_idx[word]] = 1
    return one_hot

# Softmax
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / np.sum(e_x)

# التدريب
for epoch in range(5000):
    total_loss = 0
    for sentence, target in data:
        # Forward pass
        hs = {}
        hs[-1] = np.zeros((hidden_size, 1))

        for t in range(len(sentence)):
            xt = word_to_one_hot(sentence[t])
            hs[t] = np.tanh(np.dot(Wxh, xt) + np.dot(Whh, hs[t-1]) + bh)

        # Output layer
        y = np.dot(Why, hs[len(sentence)-1]) + by
        p = softmax(y)

        # Loss (cross-entropy)
        target_idx = word_to_idx[target]
        loss = -np.log(p[target_idx])
        total_loss += loss

        # Backward pass
        # Gradients initialization
        dWhy = np.zeros_like(Why)
        dWxh = np.zeros_like(Wxh)
        dWhh = np.zeros_like(Whh)
        dbh = np.zeros_like(bh)
        dby = np.zeros_like(by)
        dhnext = np.zeros_like(hs[0])

        # Output layer gradient
        dy = np.copy(p)
        dy[target_idx] -= 1  # derivative of loss wrt softmax input

        dWhy += np.dot(dy, hs[len(sentence)-1].T)
        dby += dy

        # Backprop through time
        for t in reversed(range(len(sentence))):
            dh = np.dot(Why.T, dy) + dhnext  # backprop into h
            dhraw = (1 - hs[t] * hs[t]) * dh  # tanh derivative
            dbh += dhraw
            dWxh += np.dot(dhraw, word_to_one_hot(sentence[t]).T)
            dWhh += np.dot(dhraw, hs[t-1].T)
            dhnext = np.dot(Whh.T, dhraw)

        # Clip to prevent exploding gradients
        for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
            np.clip(dparam, -5, 5, out=dparam)

        # Update weights
        Wxh -= learning_rate * dWxh
        Whh -= learning_rate * dWhh
        Why -= learning_rate * dWhy
        bh -= learning_rate * dbh
        by -= learning_rate * dby

    if epoch % 500 == 0:
        print(f'Epoch {epoch}, Loss: {total_loss}')

# Prediction function
def predict(sentence):
    hs = {}
    hs[-1] = np.zeros((hidden_size, 1))
    for t in range(len(sentence)):
        xt = word_to_one_hot(sentence[t])
        hs[t] = np.tanh(np.dot(Wxh, xt) + np.dot(Whh, hs[t-1]) + bh)

    y = np.dot(Why, hs[len(sentence)-1]) + by
    p = softmax(y)
    idx = np.argmax(p)
    return idx_to_word[idx]

# اختبار الموديل
print("Prediction for ['I', 'love', 'deep']:", predict(['I', 'love', 'deep']))
print("Prediction for ['machine', 'learning', 'is']:", predict(['machine', 'learning', 'is']))


Epoch 0, Loss: [8.78653819]
Epoch 500, Loss: [1.57657521]
Epoch 1000, Loss: [0.17882876]
Epoch 1500, Loss: [0.04759607]
Epoch 2000, Loss: [0.0266513]
Epoch 2500, Loss: [0.01826221]
Epoch 3000, Loss: [0.01381411]
Epoch 3500, Loss: [0.01108126]
Epoch 4000, Loss: [0.00924035]
Epoch 4500, Loss: [0.00791939]
Prediction for ['I', 'love', 'deep']: learning
Prediction for ['machine', 'learning', 'is']: fun
