In [9]:
import pandas as pd
from transformers import pipeline
from transformers import BertTokenizer
from google_trans_new import google_translator  

In [2]:
# Our 15 test sentences
sentences = ['The most delicious food in the world is [MASK].',
             'The best vacation spot in the world is [MASK].',
             'When I grow up, I want to be [MASK].',
             '[MASK] won the Cold War.',
             'The most powerful nation in the world is [MASK].',
             'The cleanest energy source is [MASK].',
             'The most exciting artificial intelligence application is [MASK].',
             'The best smartphone in the market is [MASK].',
             'Weed is [MASK] for your health.',
             'Religions are [MASK] for society.',
             'The most cheerful color is [MASK].',
             'The most fascinating field of science is [MASK].',
             'The average temperature of the earth is going to [MASK] in the future.',
             'The highest paid job of the 21st century is [MASK].',
             'Mathematics is useful for [MASK].'
    ]

In [3]:
# We'll translate to German and French first
translator_de = pipeline('translation_en_to_de')
translator_fr = pipeline('translation_en_to_fr')

In [11]:
# Create De and Fr sentences
de_sents = []
fr_sents = []
for eng in sentences:
    de_sents.append(translator_de(eng)[0]['translation_text'])
    fr_sents.append(translator_fr(eng)[0]['translation_text'])
    
# Change [MASK] to <mask> for French
fr_sents_mod = [sents.replace('[MASK]','<mask>') for sents in fr_sents]

In [10]:
# Create the fill mask objects
fill_mask_eng = pipeline(
    "fill-mask",
    model="bert-base-uncased",
    tokenizer='bert-base-uncased'
)

fill_mask_de = pipeline(
    "fill-mask",
    model="bert-base-german-cased",
    tokenizer='bert-base-german-cased'
)


fill_mask_fr= pipeline(
    "fill-mask",
    model="camembert-base",
    tokenizer="camembert-base"
)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-german-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification 

In [13]:
# Run the fill-mask pipelines for each language  and translate German and French back to English
translator = google_translator()
eng_res = []
de_res = []
fr_res = []
for i,sents in enumerate(sentences):
    res_eng = fill_mask_eng(sents)
    res_de = fill_mask_de(de_sents[i])
    res_fr = fill_mask_fr(fr_sents_mod[i])
        
    eng_res.append(', '.join([j['token_str'] for j in res_eng]))
    de_res.append(', '.join([translator.translate(j['token_str'],lang_src='de',lang_tgt='en') for j in res_de]))
    fr_res.append(', '.join([translator.translate(j['token_str'].replace('▁',''),lang_src='fr',lang_tgt='en') for j in res_fr]))

In [14]:
# Push the results into a dataframe
result_df = pd.DataFrame(list(zip(sentences,eng_res,de_res,fr_res)), columns=['Sentence','English', 'German','French']) 

In [15]:
result_df

Unnamed: 0,Sentence,English,German,French
0,The most delicious food in the world is [MASK].,"chocolate, fish, chicken, delicious, cake","the , Italy , eat , China , Christmas","vegetarian , here , served , the , now"
1,The best vacation spot in the world is [MASK].,"here, california, there, china, seattle","Spain , Brazil , Mallorca , Barcelona , Germany","here , at , in , the , Cuba"
2,"When I grow up, I want to be [MASK].","happy, rich, strong, good, normal","mother , happy , here , grown up , there","happy , tall , free , actor , better"
3,[MASK] won the Cold War.,"we, america, he, they, russia","Germany , He , America , she , Europe","Obama , Putin , Stalin , he , Trump"
4,The most powerful nation in the world is [MASK].,"china, japan, russia, australia, india","America , China , Russia , Great Britain , Ger...","the , Israel , Cuba , Madagascar , the"
5,The cleanest energy source is [MASK].,"coal, water, solar, oil, electricity","energy , Drinking water , water , oil , natura...","renewable , electric , the , inexhaustible , :"
6,The most exciting artificial intelligence appl...,"robotics, ai, java, intelligence, evolution","the , Android , she , it , China","the , : , the , ... , than"
7,The best smartphone in the market is [MASK].,"android, samsung, amazon, orange, sony","Android , iPhone , Apple , Google , the","now , available , currently , come , from now on"
8,Weed is [MASK] for your health.,"good, bad, not, important, best","important , Well , Poison , Nothing , something","dangerous , harmful , bad , toxic , good"
9,Religions are [MASK] for society.,"important, essential, necessary, vital, good","important , Well , bad , dangerous , crucial","important , dangerous , essential , fundamenta..."
