In [1]:
import os
import re
import numpy as np

from pathlib import Path

import pandas as pd
import torch
import evaluate


from torch import nn
from tqdm.notebook import tqdm
from tokenizers import Tokenizer
from tokenizers import normalizers
from tokenizers import pre_tokenizers
from tokenizers.models import WordLevel, WordPiece
from tokenizers.processors import TemplateProcessing

from torch.utils.data import DataLoader, Dataset, Subset

from tokenizers.normalizers import NFC, StripAccents, Strip, Lowercase
from tokenizers.pre_tokenizers import Whitespace
from tokenizers.trainers import WordLevelTrainer, WordPieceTrainer

from sklearn.model_selection import train_test_split

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Dataset

## Setup

Arabic English sentences pairs, originally obtained from [Kaggle](https://www.kaggle.com/datasets/samirmoustafa/arabic-to-english-translation-sentences)

In [3]:
data_dir = os.path.join(os.curdir, "data")
data_path = os.path.join(data_dir, "ara_eng.txt")


data = pd.read_csv(data_path, sep="\t", names=["target", "source"])

In [4]:
data

Unnamed: 0,target,source
0,Hi.,مرحبًا.
1,Run!,اركض!
2,Help!,النجدة!
3,Jump!,اقفز!
4,Stop!,قف!
...,...,...
24633,rising voices promoting a more linguistically ...,شاركنا تحدي ابداع ميم بلغتك الام تعزيزا للتنوع...
24634,following last year s successful campaign we i...,استكمالا لنجاح حملة العام السابق ندعوكم للمشار...
24635,during last year s challenge we also met langu...,تعرفنا خلال تحدي العام الماضي على ابطال لغويين...
24636,to take part just follow the simple steps outl...,للمشاركة في التحدي اتبع الخطوات الموضحة على ال...


In [5]:
print(f"Total samples = {len(data)}")

Total samples = 24638


## Preprocessing

In [6]:
punc_re = re.compile(r"""[!"#$%&\'()*+,-./:;<=>?@[\\\]^_`{|}~،؟…«“\":\"…”]""")
def remove_punctation(text: str) -> str:
    return punc_re.sub(repl="", string=text)


diactircs_re = re.compile("[\u064B-\u0652]")
def remove_diactrics(text: str) -> str:
    # TODO: Implement this function using the diactrics_re
    return diactircs_re.sub(repl="", string=text)

def clean_text(text: str) -> str:
    return remove_punctation(remove_diactrics(text))

In [7]:
data["target"] = data["target"].map(clean_text)
data["source"] = data["source"].map(clean_text)

In [8]:
data["target"].duplicated(keep=False).sum()

3241

In [9]:
data["length"] = data["source"].map(lambda x: len(x.split(" ")))

In [10]:
max_len = np.percentile(data["length"], q=85)
max_len

32.0

In [11]:
data = data[data["length"] <= max_len]
data.shape

(20963, 3)

In [12]:
train, test = train_test_split(data)

In [13]:
len(train), len(test)

(15722, 5241)

## Tokenization

In [14]:
normalizer = normalizers.Sequence([NFC(), StripAccents(), Lowercase(), Strip()])

In [15]:
post_processor = TemplateProcessing(
    single="[SOS] $A [EOS]",
    special_tokens=[("[SOS]", 2), ("[EOS]", 3)]
)

In [16]:
ar_tokenizer = Tokenizer(model=WordLevel(unk_token="[OOV]"))
en_tokenizer = Tokenizer(model=WordLevel(unk_token="[OOV]"))

In [17]:
pre_tokenizer = pre_tokenizers.Sequence([Whitespace()])

In [18]:
ar_tokenizer.normalizer = normalizer
ar_tokenizer.pre_tokenizer = pre_tokenizer

en_tokenizer.normalizer = normalizer
en_tokenizer.pre_tokenizer = pre_tokenizer

ar_tokenizer.enable_padding(direction='right')
en_tokenizer.enable_padding(direction='right')


ar_tokenizer.post_processor = post_processor
en_tokenizer.post_processor = post_processor

In [19]:
trainer = WordLevelTrainer(vocab_size=200000, show_progress=True, min_frequency=2, 
                           special_tokens=["[PAD]", "[OOV]", "[SOS]", "[EOS]"]
                          )

In [20]:
ar_tokenizer.train_from_iterator(iterator=data["source"].tolist(), trainer=trainer)
en_tokenizer.train_from_iterator(iterator=data["target"].tolist(), trainer=trainer)

In [21]:
ar_tokenizer.get_vocab()

{'المعبد': 6216,
 'بكلمة': 11881,
 'شيكين': 13027,
 'الي': 842,
 'تلمس': 12275,
 'ركاب': 6632,
 'الفلسطيني': 956,
 'اضطر': 7441,
 'اكتيف': 10332,
 'ننس': 14559,
 'رئيسي': 12656,
 'نال': 9486,
 'المفاوضات': 8012,
 'والفلسطينيين': 14761,
 'اوقفت': 11491,
 'الأيام': 2530,
 'والشركات': 14750,
 'جلبت': 6532,
 'باكو': 6306,
 'اساليب': 7402,
 'يتحسن': 4803,
 'مولف': 4723,
 'المناقشات': 8030,
 'عنه': 539,
 'ليلة': 726,
 'الحداد': 7673,
 'التقنيات': 10591,
 'المدن': 683,
 'أهتم': 9994,
 'البطيخ': 6025,
 'الفضول': 6158,
 'للتظاهر': 9198,
 'الالات': 10449,
 'لطفلة': 13716,
 'بي': 342,
 'اكرهه': 10334,
 'تونج': 8514,
 'انقلب': 11463,
 'يتفقون': 15117,
 'يركب': 15182,
 'القضايية': 3233,
 'كيف': 46,
 'وتحمل': 5723,
 'ذو': 2665,
 'وقتها': 9714,
 'والمراقبة': 14782,
 'الرجوع': 10767,
 'ههنا': 5695,
 'البطولة': 7598,
 'منحة': 5654,
 'بتوم': 2930,
 'معرض': 1324,
 'اتاحة': 10053,
 'شكك': 13005,
 'موراكامي': 1239,
 'متأخرا': 3968,
 'اللهب': 11081,
 'الحذاء': 6071,
 'السماء': 2319,
 'ضحايا': 1582,
 'اربعة'

In [22]:
en_tokenizer.get_vocab()

{'tercera': 9238,
 'inaugurated': 4936,
 'guagua': 7869,
 'social': 170,
 'protested': 4338,
 'achieved': 2551,
 'seller': 6542,
 'housewives': 7962,
 'warmer': 6761,
 'estimates': 7651,
 'mbomio': 8347,
 'drag': 7548,
 'instead': 985,
 'fas': 4832,
 'tens': 5348,
 'apology': 6946,
 'citizens': 759,
 'towns': 3865,
 'regularly': 4366,
 'frenzy': 7784,
 'council': 1555,
 'nominated': 2968,
 'igbo': 7989,
 'uzbekistan': 1909,
 'brutal': 4652,
 'israeli': 511,
 'shounda': 9027,
 'declined': 4052,
 'hurt': 1249,
 'clothes': 1552,
 'albert': 4562,
 'nationality': 3712,
 'porto': 3316,
 'key': 1048,
 'unpaid': 9344,
 'hussam': 6013,
 'intended': 4199,
 'laughing': 1989,
 'policemen': 6379,
 'kingdom': 1574,
 'greetings': 7863,
 'fill': 2867,
 'muhammad': 2680,
 'strongest': 9151,
 'resource': 3793,
 'bothered': 7140,
 'terms': 1905,
 'ecb': 7579,
 'deliver': 5737,
 'your': 38,
 'kiss': 3672,
 'through': 193,
 'area': 939,
 'turkmenistan': 5369,
 'fromsyriawithlove': 5913,
 'ecuadorian': 7582

In [23]:
ar_tokenizer.get_vocab_size()

15343

In [24]:
en_tokenizer.get_vocab_size()

9495

## Dataset and DataLoader

In [25]:
class TranslationDataset(Dataset):
    def __init__(self, df: pd.DataFrame):
        self.df = df.sort_values(by="length")
        
        self.src = df["source"].tolist()
        self.trg = df["target"].tolist()
                       
        
    def __getitem__(self, idx):
        source = self.src[idx]
        target = self.trg[idx]
        return (source, target)
        
    def __len__(self):
        return len(self.df)

In [26]:
train_dataset = TranslationDataset(df=train)
test_dataset = TranslationDataset(df=test)

In [27]:
def prepare_batch(batch, ar_tokenizer: Tokenizer, en_tokenizer: Tokenizer):
    ar = list(map(lambda x: x[0], batch))
    en = list(map(lambda x: x[1], batch))
    
    
    ar = ar_tokenizer.encode_batch(ar)
    en = en_tokenizer.encode_batch(en)
    
    
    
    ar_tokens = list(map(lambda x: x.ids, ar))
    ar_mask = list(map(lambda x: x.attention_mask, ar))
    en = list(map(lambda x: x.ids, en))
    
    source = torch.tensor(ar_tokens)
    source_mask = torch.tensor(ar_mask)
    target = torch.tensor(en)
    return (source, source_mask, target)

In [28]:
train_dataloader = DataLoader(dataset=train_dataset, batch_size=64, 
                              collate_fn=lambda x: prepare_batch(x, ar_tokenizer, en_tokenizer)
                             ) 

test_dataloader = DataLoader(dataset=test_dataset, batch_size=64, 
                             collate_fn=lambda x: prepare_batch(x, ar_tokenizer, en_tokenizer)
                            ) 

# Model

## Seq2Seq w/ Attention

In [29]:
class Encoder(nn.Module):
    def __init__(
        self, 
        vocab_size: int, 
        embedding_dim: int, 
        latent_dim: int, 
        decoder_latent: int,
        dropout: float = 0.3
    ):
        super().__init__()
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.latent_dim = latent_dim
        self.decoder_latent = decoder_latent
        self.dropout_ratio = dropout
        
            
        self.embbeding = nn.Embedding(num_embeddings=self.vocab_size, embedding_dim=self.embedding_dim)
        self.rnn = nn.GRU(input_size=self.embedding_dim, hidden_size=self.latent_dim, 
                           batch_first=True, bidirectional=True)
        
        
        self.fc = nn.Linear(in_features=self.latent_dim * 2, out_features=self.decoder_latent)
        self.dropout = nn.Dropout(p=self.dropout_ratio)
        
    def forward(self, x, src_len):
        # x = [batch_size, seq_length]
        
        embeddings = self.embbeding(x)
        # embeddings = [batch_size, seq_length, embedding_dim]
        embeddings = self.dropout(embeddings)
        
        packed_embedded = nn.utils.rnn.pack_padded_sequence(
            embeddings, 
            src_len.to('cpu'), 
            batch_first=True, 
            enforce_sorted=False
        )
        
        packed_outputs, hidden = self.rnn(packed_embedded)
        
        outputs, _ = nn.utils.rnn.pad_packed_sequence(packed_outputs, batch_first=True)
        # outputs = [batch_size, seq_length, latent_dim * 2 for each direction]
        # hidden = [num_directions * num_layers, batch_size, latent_dim]
        
        hidden = torch.cat([hidden[-2, :, :], hidden[-1, :, :]], dim=-1)
        # hidden = [batch_size, latent_dim * 2]
        hidden = torch.tanh(self.fc(hidden))
        
        return outputs, hidden
    

class BahdanauAttention(nn.Module):
    def __init__(self, encoder_latent: int, decoder_latent: int):
        super().__init__()
       
        self.attn = nn.Linear(in_features=(encoder_latent * 2) + decoder_latent, out_features=decoder_latent)
        self.v = nn.Linear(in_features=decoder_latent, out_features=1)
        
        
    def forward(self, hidden, encoder_hidden, src_mask):
        # hidden = [batch_size, decoder_dim]
        # encoder_hidden = [batch_size, src_seq_len, encoder_dim * 2]
        # src_mask = [batch_size, src_seq_len]
        
        src_len = encoder_hidden.shape[1]
        
        # Repeat latent dim for each sequence step in the encoder output
        hidden = hidden.unsqueeze(1).repeat(1, src_len, 1)
        # hidden = [batch_size, src_seq_len, decoder_dim]
        
        combined_states = torch.cat([hidden, encoder_hidden], dim=-1)
        # combined_states = [batch_size, src_seq_len, encoder_dim * 2 + decoder_dim]
        
        energy = torch.tanh(self.attn(combined_states))
        # energy = [batch_size, src_seq_length, decoder_dim]
        
        attention = self.v(energy).squeeze(2)
        # attention = [batch_size, src_seq_length, 1] => [batch_size, src_seq_length]
        
        # mask pad tokens
        attention = attention.masked_fill(src_mask == 0, -1e10)

        attention = nn.functional.softmax(attention, dim=-1)
        
        return attention
        

class Decoder(nn.Module):
    def __init__(
        self, 
        vocab_size: int,
        embedding_dim:int, 
        latent_dim: int, 
        encoder_latent: int,
        attention: nn.Module,
        dropout: float = 0.3
                ):
        super().__init__()
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.latent_dim = latent_dim
        self.encoder_latent = encoder_latent
        self.dropout_ratio = dropout
        
        self.attention = attention
        self.embbeding = nn.Embedding(num_embeddings=self.vocab_size, embedding_dim=self.embedding_dim)
        self.rnn = nn.GRU(input_size=self.embedding_dim + (self.encoder_latent * 2),
                          hidden_size=self.latent_dim, 
                          batch_first=True
                         )
        
        self.fc1 = nn.Linear(
            in_features=self.latent_dim + (self.encoder_latent * 2) + self.embedding_dim, 
            out_features=self.latent_dim * 2
        )
        
        self.fc2 = nn.Linear(in_features=self.latent_dim * 2, out_features=self.vocab_size)
        self.dropout = nn.Dropout(p=self.dropout_ratio)
        
    def forward(self, x, hidden, encoder_hidden, src_mask):
        # x = [batch_size]
        # hidden = [batch_size, latent_dim]
        # encoder_hidden = [batch_size, src_seq_len, encoder_dim * 2]
        # src_mask = [batch_size, src_seq_len]
        
        
        embeddings = self.embbeding(x)
        # embeddings = [batch_size, embedding_dim]
        embeddings = self.dropout(embeddings)
        
        # Add dimension as sequence length for RNN
        embeddings = embeddings.unsqueeze(1)
        # embeddings = [batch_size, 1, embedding_dim]
        
        attention_scores = self.attention(hidden, encoder_hidden, src_mask).unsqueeze(1)
        # attention_scores = [batch_size, src_seq_length] => [batch_size, 1, src_seq_length]
        
        attention_values = torch.bmm(attention_scores, encoder_hidden)
        # attention_values = [batch_size, 1, encoder dim * 2]
        
        rnn_input = torch.cat([embeddings, attention_values], dim=-1)
        # rnn_input = [batch_size, 1, encoder_dim * 2 + embedding_dim]
        
        output, hidden = self.rnn(rnn_input, hidden.unsqueeze(0))
        # output = [batch_size, 1, latent_dim]
        # hidden = [1, batch_size, latent_dim]
        
        output = torch.cat([output, embeddings, attention_values], dim=-1)
        output = output.squeeze(1)
        # output = [batch_size, 1, embedding_dim + encoder_dim * 2 + latent_dim] => [batch_size, embedding_dim + encoder_dim * 2 + latent_dim]
        
        output = torch.tanh(self.fc1(output))
        output = self.dropout(output)
        output = self.fc2(output)
        # output = [batch_size, vocab_size]
        
        return output, hidden.squeeze(0), attention_scores.squeeze(1)

In [30]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder: nn.Module, decoder: nn.Module, device: torch.device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        
        
    def forward(self, src: torch.Tensor, src_mask: torch.Tensor, max_len: int, start_token: int):
        # src = [batch_size, src_len]
        # src_mask = [batch_size, src_len]
        
        batch_size, src_seq_len = src.shape
        out_vocab_size = self.decoder.vocab_size

        
        outputs = torch.zeros(batch_size, max_len, out_vocab_size).to(self.device)
        attentions = torch.zeros(batch_size, max_len, src_seq_len).to(device)
        
        src_len = torch.count_nonzero(src_mask, dim=-1)
        
        encoder_hidden, hidden = self.encoder(src, src_len)
        inputs = torch.tensor(start_token).repeat(batch_size).to(device)
        
        for step in range(1, max_len):
            output, hidden, step_attention = self.decoder(inputs, hidden, encoder_hidden, src_mask)
            # output = [batch_size, vocab_size]   
            # hidden = [batch_size, latent_dim]
            # step attention = [batch_size, src_seq_length]
            
            outputs[:, step] = output
            attentions[:, step] = step_attention
            
            top_prediction = output.argmax(1)
            # top_prediction = [batch_size]
            inputs = top_prediction
            
        return outputs, attentions
            


In [39]:
encoder = Encoder(
    vocab_size=ar_tokenizer.get_vocab_size(), 
    embedding_dim=512, 
    latent_dim=512,
    decoder_latent=1024,
    dropout=0.5
)
attention = BahdanauAttention(
    encoder_latent=512,
    decoder_latent=1024
)
decoder = Decoder(
    vocab_size=en_tokenizer.get_vocab_size(),
    embedding_dim=512,
    latent_dim=1024,
    encoder_latent=512,
    attention=attention,
    dropout=0.5
)

In [40]:
model = Seq2Seq(encoder=encoder, decoder=decoder, device=device).to(device)

# Training

In [41]:
optim = torch.optim.Adam(params=model.parameters())
criterion = nn.CrossEntropyLoss(ignore_index=0)

In [42]:
for epoch in range(10):
    epoch_train_loss = 0
    epoch_val_loss = 0
    
    model.train()
    for batch in tqdm(train_dataloader):
        optim.zero_grad()
        
        src, src_mask, trg = batch
        src, src_mask, trg = src.to(device), src_mask.to(device), trg.to(device)
        
        trg_max_len = trg.shape[1]
        
        output, _ = model(src, src_mask, trg_max_len, en_tokenizer.token_to_id("[SOS]"))

        # trg = [batch size, trg_max_len]
        # output = [batch size, trg_max_len, output dim]
        
        output_dim = output.shape[-1]
        
        output = output[1:].view(-1, output_dim)
        trg = trg[1:].view(-1)
        
       
        
        #trg = [(trg len - 1) * batch size]
        #output = [(trg len - 1) * batch size, output dim]
        
        loss = criterion(output, trg)
        
        loss.backward()
        
        
        
        optim.step()
        
        epoch_train_loss += loss.item()
        
    model.eval()
    for batch in tqdm(test_dataloader):
        src, src_mask, trg = batch
        src, src_mask, trg = src.to(device), src_mask.to(device), trg.to(device)
        
        trg_max_len = trg.shape[1]
        
        output, _ = model(src, src_mask, trg_max_len, en_tokenizer.token_to_id("[SOS]"))

        # trg = [batch size, trg_max_len]
        # output = [batch size, trg_max_len, output dim]
        
        output_dim = output.shape[-1]
        
        output = output[1:].view(-1, output_dim)
        trg = trg[1:].view(-1)
        
        #trg = [(trg len - 1) * batch size]
        #output = [(trg len - 1) * batch size, output dim]
        
        loss = criterion(output, trg)
        
        
        epoch_val_loss += loss.item()
        
        
    epoch_train_loss = epoch_train_loss / len(train_dataloader)
    epoch_val_loss = epoch_val_loss / len(test_dataloader)
    print(f"Epoch: {epoch}\tTrain Loss = {epoch_train_loss}\tValidation Loss = {epoch_val_loss}")


  0%|          | 0/246 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

Epoch: 0	Train Loss = 6.819220426605969	Validation Loss = 6.393634325120507


  0%|          | 0/246 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

Epoch: 1	Train Loss = 6.370630285604213	Validation Loss = 6.24668494666495


  0%|          | 0/246 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

Epoch: 2	Train Loss = 6.041246014881909	Validation Loss = 6.178138349114395


  0%|          | 0/246 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

Epoch: 3	Train Loss = 5.745515449260308	Validation Loss = 6.110433270291584


  0%|          | 0/246 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

Epoch: 4	Train Loss = 5.4765182336171465	Validation Loss = 6.102447143415126


  0%|          | 0/246 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

Epoch: 5	Train Loss = 5.254345151467052	Validation Loss = 6.171457918678842


  0%|          | 0/246 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

Epoch: 6	Train Loss = 5.077527815733498	Validation Loss = 6.201262979972653


  0%|          | 0/246 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

Epoch: 7	Train Loss = 4.941342173553094	Validation Loss = 6.237926721572876


  0%|          | 0/246 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

Epoch: 8	Train Loss = 4.849315432029042	Validation Loss = 6.339739752978813


  0%|          | 0/246 [00:00<?, ?it/s]

  0%|          | 0/82 [00:00<?, ?it/s]

Epoch: 9	Train Loss = 4.797925792089322	Validation Loss = 6.372467733011013


# Save

In [46]:
output_dir = os.path.join(os.curdir, "data", "seq2seq-attention")
Path(output_dir).mkdir(exist_ok=True)

model_path = os.path.join(output_dir, "model.pt")


In [47]:
torch.save(obj=model, f=model_path)

In [48]:
ar_tokenizer.save(path=os.path.join(output_dir, "ar_tokenizer.json"))
en_tokenizer.save(path=os.path.join(output_dir, "en_tokenizer.json"))    

# Evaluate

In [49]:
def sentences_to_batch(tokenizer: Tokenizer, sentences: list[str]) -> tuple[torch.Tensor, torch.Tensor]:
    tokenized = tokenizer.encode_batch(sentences)
    
    ids = torch.tensor([sample.ids for sample in tokenized])
    mask = torch.tensor([sample.attention_mask for sample in tokenized])
    
    return ids, mask
    
    
def decode_predictions(tokenizer: Tokenizer, predictions: torch.Tensor) -> list[str]:
    batch_size, max_len, vocab_size = predictions.shape
    
    results = []
    for sample_idx in range(batch_size):
        output = predictions[sample_idx]
        
        output_tokens = [token.item() for token in torch.argmax(output, dim=-1)] 
        result = tokenizer.decode(output_tokens)
        results.append(result)
        
    return results
        

metric = evaluate.load("bleu")

In [50]:
samples = test_dataset[0:10]

sources = samples[0]
actual = samples[1]

inputs, mask = sentences_to_batch(tokenizer=ar_tokenizer, sentences=sources)
inputs, mask = inputs.to(device), mask.to(device)

In [51]:
predictions, attention_scores = model(inputs, mask, 10, 2)
predictions, attention_scores = predictions.to(device), attention_scores.to(device)

predictions = decode_predictions(en_tokenizer, predictions)

In [52]:
for i in range(len(sources)):
    print(f"Source: {sources[i]}\nTranslation: {predictions[i]}\nActual: {actual[i]}\n----------\n")

Source: مدريد عاصمة أسبانيا
Translation: madrid madrid me me
Actual: Madrid is the capital of Spain
----------

Source: تجدون هنا رصدا وتجميعا لروابط مدونات ووسايل اعلام تناولت لقاينا ولا تترددوا في الاشارة الى اية روابط اخرى تجدونها من خلال ادراجها في قسم التعليقات اسفله
Translation: here have a a the a the
Actual: links to media and blog coverage are being collected here please post in the comments section to let us know what we may have missed
----------

Source: يا اخوانا احنا بنجمع اسامي اللي اتقبض عليهم دلوقتي يا ريت اللي يعرف حد يكتب اسمه هنا او علي الحرية للجدعان
Translation: to to to
Actual: we re now collecting the names of those arrested if anybody knows someone captured please leave a comment with his name freedomforthegoodpeople
----------

Source: هذا الوسم هو تعبير عن حب صاف للفنزويليين مرسلا من مشاعر الشعب المكسيكي الجامحة
Translation: this is is is is is the of
Actual: ge fucksuicideroom august
----------

Source: يجب أن يكونوا قد جنوا للاعتقاد في مثل هذا الهراء
Transl