In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models

In [2]:
# ۱. آماده‌سازی داده‌ها (Corpus)
en_sentences = ["hello", "bye"]
fa_sentences = ["<START> سلام <END>", "<START> خداحافظ <END>"]

en_vocab = {"<PAD>": 0, "hello": 1, "bye": 2}
fa_vocab = {"<PAD>": 0, "<START>": 1, "سلام": 2, "خداحافظ": 3, "<END>": 4}
inv_fa_vocab = {v: k for k, v in fa_vocab.items()}

maxlen = 4 # طول ثابت برای سادگی
def tokenize(data, vocab):
    tokenized = []
    for s in data:
        ids = [vocab[w] for w in s.split()]
        tokenized.append(ids + [0] * (maxlen - len(ids)))
    return np.array(tokenized)

x_enc = tokenize(en_sentences, en_vocab)
# ورودی دکودر (بدون آخرین کلمه) و خروجی هدف (بدون اولین کلمه)
x_dec = tokenize([" ".join(s.split()[:-1]) for s in fa_sentences], fa_vocab)
y_train = tokenize([" ".join(s.split()[1:]) for s in fa_sentences], fa_vocab)

In [3]:
# ۲. لایه Positional Encoding
class PositionalEncoding(layers.Layer):
    def __init__(self, maxlen, embed_dim):
        super().__init__()
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
    def call(self, x):
        positions = tf.range(start=0, limit=tf.shape(x)[1], delta=1)
        return x + self.pos_emb(positions)

In [4]:
# ۳. لایه Encoder
class EncoderLayer(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim):
        super().__init__()
        self.mha = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim)])
        self.ln1, self.ln2 = layers.LayerNormalization(), layers.LayerNormalization()

    def call(self, x):
        attn = self.mha(x, x)
        x = self.ln1(x + attn) # Add & Norm
        return self.ln2(x + self.ffn(x)) # Add & Norm

In [5]:
# ۴. لایه Decoder (با رفع ارور ماسک)
class DecoderLayer(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim):
        super().__init__()
        self.self_attn = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.cross_attn = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim)])
        self.ln1, self.ln2, self.ln3 = layers.LayerNormalization(), layers.LayerNormalization(), layers.LayerNormalization()

    def call(self, x, enc_output):
        # ساخت ماسک به صورت داینامیک در زمان اجرا
        s = tf.shape(x)[1]
        mask = 1 - tf.linalg.band_part(tf.ones((s, s)), -1, 0)

        # ۱. Masked Self-Attention
        x = self.ln1(x + self.self_attn(x, x, attention_mask=mask))
        # ۲. Cross-Attention
        x = self.ln2(x + self.cross_attn(x, enc_output))
        # ۳. Feed Forward
        return self.ln3(x + self.ffn(x))

In [6]:
# ۵. ساخت مدل نهایی
embed_dim, n_heads, ff_dim = 64, 4, 128
enc_in = layers.Input(shape=(maxlen,))
dec_in = layers.Input(shape=(maxlen,))

# مسیر انکودر
x = layers.Embedding(len(en_vocab), embed_dim)(enc_in)
x = PositionalEncoding(maxlen, embed_dim)(x)
enc_out = EncoderLayer(embed_dim, n_heads, ff_dim)(x)

# مسیر دکودر
x = layers.Embedding(len(fa_vocab), embed_dim)(dec_in)
x = PositionalEncoding(maxlen, embed_dim)(x)
dec_out = DecoderLayer(embed_dim, n_heads, ff_dim)(x, enc_out)

outputs = layers.Dense(len(fa_vocab), activation="softmax")(dec_out)
model = models.Model([enc_in, dec_in], outputs)

model.compile(optimizer="adam", loss="sparse_categorical_crossentropy")
model.fit([x_enc, x_dec], y_train, epochs=200)

<keras.src.callbacks.history.History at 0x7edb1a3194f0>

In [7]:
# ۶. تابع تست (Inference) برای ترجمه داده جدید
def translate(sentence):
    # تبدیل ورودی انگلیسی به ID
    enc_input = [en_vocab.get(w, 0) for w in sentence.split()]
    enc_input = np.array([enc_input + [0] * (maxlen - len(enc_input))])

    # شروع با توکن <START>
    dec_input_ids = [fa_vocab["<START>"]]

    for _ in range(maxlen - 1):
        pad_dec = dec_input_ids + [0] * (maxlen - len(dec_input_ids))
        preds = model.predict([enc_input, np.array([pad_dec])], verbose=0)

        # انتخاب کلمه با بیشترین احتمال در جایگاه فعلی
        next_id = np.argmax(preds[0, len(dec_input_ids)-1, :])
        if next_id == fa_vocab["<END>"] or next_id == 0:
            break
        dec_input_ids.append(next_id)

    return " ".join([inv_fa_vocab[i] for i in dec_input_ids if i in inv_fa_vocab])

# تست نهایی
print("--- تست مدل ---")
print(f"Input: hello -> Output: {translate('hello')}")
print(f"Input: bye   -> Output: {translate('bye')}")

--- تست مدل ---
Input: hello -> Output: <START> سلام
Input: bye   -> Output: <START> خداحافظ
