<a href="https://colab.research.google.com/github/Alabassy/ASS-11/blob/main/Untitled3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import math

# 1. Prepare the data
words = ["الأهلي", "نادي", "القرن", "الحقيقي"]
word_to_idx = {word: idx for idx, word in enumerate(words)}
idx_to_word = {idx: word for word, idx in word_to_idx.items()}
vocab_size = len(words)

def to_one_hot(idx, size):
    vec = [0] * size
    vec[idx] = 1
    return vec

inputs = [to_one_hot(word_to_idx[word], vocab_size) for word in ["الأهلي", "نادي", "القرن"]]

# Target (one-hot vector for "الحقيقي")
target_idx = word_to_idx["الحقيقي"]
target = [0] * vocab_size
target[target_idx] = 1

# 2. Initialize weights manually
hidden_size = 2

Wxh = [
    [0.5, -0.4],
    [-0.3, 0.8],
    [0.7, -0.5],
    [-0.6, 0.2]
]  # (input_size x hidden_size)

Whh = [
    [0.2, -0.1],
    [0.5, 0.3]
]  # (hidden_size x hidden_size)

Why = [
    [0.6, -0.2, 0.3, -0.5],
    [-0.1, 0.7, 0.4, 0.5]
]  # (hidden_size x output_size)

# 3. Helper functions
def mat_vec_mul(mat, vec):
    return [sum(m * v for m, v in zip(row, vec)) for row in zip(*mat)]

def softmax(x):
    exps = [math.exp(i) for i in x]
    sum_exps = sum(exps)
    return [j / sum_exps for j in exps]

def vec_mse(a, b):
    return sum((x - y)**2 for x, y in zip(a, b)) / len(a)

# 4. Forward pass
h = [0.0] * hidden_size  # initial hidden state

for x in inputs:
    xh = mat_vec_mul(Wxh, x)
    hh = mat_vec_mul(Whh, h)
    h = [math.tanh(xh_i + hh_i) for xh_i, hh_i in zip(xh, hh)]

# Output layer
y_raw = mat_vec_mul(Why, h)
y_pred = softmax(y_raw)

# 5. Loss
loss = vec_mse(y_pred, target)

# 6. Gradients
# Derivative of MSE Loss w.r.t output
dL_dy = [(2 * (y_pred[i] - target[i])) / len(target) for i in range(len(target))]

# Gradients for Why
dWhy = []
for i in range(len(h)):
    dWhy_row = []
    for d in dL_dy:
        dWhy_row.append(h[i] * d)
    dWhy.append(dWhy_row)

# Gradients for Whh
dWhh = []
for i in range(hidden_size):
    dWhh_row = []
    for j in range(hidden_size):
        dWhh_row.append(h[i] * (1 - h[i]**2) * h[j])
    dWhh.append(dWhh_row)

# Gradients for Wxh
dWxh = []
for i in range(vocab_size):
    dWxh_row = []
    for j in range(hidden_size):
        x_sum = sum(input_vec[i] for input_vec in inputs)
        dWxh_row.append(x_sum * (1 - h[j]**2))
    dWxh.append(dWxh_row)

# 7. Light and casual output

print("\n--- Forward Result ---")
print(f"Predicted Probabilities:")
for idx, prob in enumerate(y_pred):
    print(f"  {idx_to_word[idx]}: {prob:.4f}")

predicted_idx = y_pred.index(max(y_pred))
predicted_word = idx_to_word[predicted_idx]
print(f"\nThe model thinks the next word is: '{predicted_word}'")

print(f"\nLoss = {loss:.6f}")

print("\n--- Gradients ---")

print("\ndWhy:")
for i, row in enumerate(dWhy):
    print(f"Neuron {i}: {row}")

print("\ndWhh:")
for i, row in enumerate(dWhh):
    print(f"Neuron {i}: {row}")

print("\ndWxh:")
for i, row in enumerate(dWxh):
    print(f"Input {i}: {row}")



--- Forward Result ---
Predicted Probabilities:
  الأهلي: 0.3955
  نادي: 0.1771
  القرن: 0.2764
  الحقيقي: 0.1511

The model thinks the next word is: 'الأهلي'

Loss = 0.246210

--- Gradients ---

dWhy:
Neuron 0: [0.14228713896338213, 0.06371442967254094, 0.09944645511559749, -0.3054480237515206]
Neuron 1: [-0.056293316573224395, -0.02520745434878575, -0.039344179809167296, 0.12084495073117744]

dWhh:
Neuron 0: [0.24968235695936683, -0.09878227973000182]
Neuron 1: [-0.18826219377896705, 0.07448251015783303]

dWxh:
Input 0: [0.4821774569538144, 0.9189480753532197]
Input 1: [0.4821774569538144, 0.9189480753532197]
Input 2: [0.4821774569538144, 0.9189480753532197]
Input 3: [0.0, 0.0]
