# Arabic-Language to English Translation

In [1]:
# let's use m2m100
from transformers import M2M100Config, M2M100ForConditionalGeneration, M2M100Tokenizer

In [2]:
# load the tokenizer and model from HF model repo
model = M2M100ForConditionalGeneration.from_pretrained('facebook/m2m100_418M')
tokenizer = M2M100Tokenizer.from_pretrained('facebook/m2m100_418M', src_lang="ar", tgt_lang="en")



In [3]:
#should translate to: "life is like a box of chocolates"

src_text = "الحياة مثل علبة من الشوكولاتة"

model_inputs = tokenizer(src_text, return_tensors="pt")
outputs = model.generate(**model_inputs, forced_bos_token_id=tokenizer.get_lang_id('en'))

with tokenizer.as_target_tokenizer():
    translated = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
print(translated)

Life is like a box of chocolate.


In [4]:
# quick method to evaluate a string
def translate(src_text):
    model_inputs = tokenizer(src_text, return_tensors="pt")
    outputs = model.generate(**model_inputs, forced_bos_token_id=tokenizer.get_lang_id('en'))
    with tokenizer.as_target_tokenizer():
        translated = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return translated

Okay! Let's try this out.

In [5]:
# I put "Acclaim Technical Services" into google translate en->ar, now we send it back.
translate('اشادة الخدمات الفنية') 

'Promotion of technical services'

In [6]:
# Recent BBC Arabic article on Sudan
translate('أطلقت قوات الشرطة السودانية الغاز المسيل للدموع لتفريق آلاف المتظاهرين المطالبين بالتحول الديمقراطي أمام القصر الجمهوري بالخرطوم، في وقت حذرت الأمم المتحدة من تدهور الأوضاع الإنسانية في السودان وأشارت إلى أن ثلاثين في المائة من الشعب السوداني سيحتاجون لمساعدة إنسانية العام المقبل.')

'Sudanese police have launched tear gas to disperse thousands of protesters demanding a democratic transformation in front of the Republican Palace in Khartoum, while the United Nations has warned of the deterioration of the humanitarian situation in Sudan and pointed out that 30 percent of the Sudanese people will need humanitarian aid next year.'

In [7]:
# translate a casual arabic tweet about US's Africa Policy
# https://twitter.com/AbdulHamtoun/status/1461836821915226119?s=20
# ------------------------------------------------------------------------------
# Democracy in our region has America better than Italy, France or Britain. 
# These people’s mentality towards us did not come out of the colonial period. 
# Americans coexist with democracy if the internal movement wins for it. And the crowd. 
# To finish it. Thatcher defended to the end

translate('للديمقراطية بمنطقتنا امريكا أفضل من إيطاليا،فرنسا او بريطانيا.هؤلاء عقليتهم اتجاهنا لم تخرج من فترة الاستعمار.يتعايش الامريكان مع الديمقراطية اذا الحراك الداخلي انتصر له.آخر فترة العنصرية في ج.ا سعى الحزبين الديمق. والجمهو.لإنهائها.تاتشر دافعت حتى النهاية')

'Democracy in our region America is better than Italy, France or Britain.These minds our direction has not come out of the colonial period.Americans coexist with democracy if the internal movement wins it.The last period of racism in J.A. the two Democratic parties sought.'

In [None]:
# try it yourself
translate(input('Insert Arabic Text Here: '))