# About
Evaluating the following transformers for the task of Fill in the blanks.
1. BERT 
2. GPT2
3. Transformer-XL 
4. XLNet
5. XLM 
6. RoBERTa 
7. DistilBERT 

In [41]:
!pip install transformers



In [132]:
from transformers import BertTokenizer, BertForMaskedLM, GPT2Tokenizer, GPT2LMHeadModel,TransfoXLTokenizer, TransfoXLLMHeadModel,XLNetTokenizer, XLNetLMHeadModel,XLMTokenizer, XLMWithLMHeadModel, RobertaTokenizer, TFRobertaForMaskedLM,DistilBertTokenizer, DistilBertForMaskedLM
import torch

#Input text
Taking a famous english proverb to evaluate
* A journey of thousand miles begins with a single step.

In [92]:
text = "A journey of -- miles begins with a -- step."

# Evaluating Bert

In [93]:
BERT_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
BERT_model = BertForMaskedLM.from_pretrained('bert-base-uncased')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [94]:
#replacing empty lines with mask token
BERT_text = text.replace('--',BERT_tokenizer.mask_token)
print(BERT_text)

A journey of [MASK] miles begins with a [MASK] step.


In [95]:
token_ids = BERT_tokenizer.encode(BERT_text,return_tensors='pt')
masked_position = (token_ids.squeeze() == BERT_tokenizer.mask_token_id).nonzero()
masked_pos = [mask.item() for mask in masked_position ]
with torch.no_grad():
    output = BERT_model(token_ids)

last_hidden_state = output[0].squeeze()

list_of_list =[]
for index,mask_index in enumerate(masked_pos):
    mask_hidden_state = last_hidden_state[mask_index]
    idx = torch.topk(mask_hidden_state, k=5, dim=0)[1]
    words = [BERT_tokenizer.decode(i.item()).strip() for i in idx]
    list_of_list.append(words)
    print ("Mask ",index+1,"Guesses : ",words)

best_guess = ""
for j in list_of_list:
    best_guess = best_guess+" "+j[0]
    print(best_guess)

Mask  1 Guesses :  ['s e v e r a l', 't w o', 't h r e e', '5 0', 'f i v e']
Mask  2 Guesses :  ['s i n g l e', 'r i g h t', 's i m p l e', 'w a l k i n g', 'c e r t a i n']
 s e v e r a l
 s e v e r a l s i n g l e


# Evaluating GPT2

In [96]:
GPT2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
GPT2_model = GPT2LMHeadModel.from_pretrained('gpt2')

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1042301.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=456318.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1355256.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=665.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=548118077.0, style=ProgressStyle(descri…




In [102]:
#replacing empty lines with mask token
GPT2_tokenizer.mask_token = '[MASK]'
GPT2_text = text.replace('--',GPT2_tokenizer.mask_token)
print(GPT2_text)

A journey of [MASK] miles begins with a [MASK] step.


In [104]:
token_ids = GPT2_tokenizer.encode(GPT2_text,return_tensors='pt')
masked_position = (token_ids.squeeze() == GPT2_tokenizer.mask_token_id).nonzero()
masked_pos = [mask.item() for mask in masked_position ]
with torch.no_grad():
    output = GPT2_model(token_ids)

last_hidden_state = output[0].squeeze()

list_of_list =[]
for index,mask_index in enumerate(masked_pos):
    mask_hidden_state = last_hidden_state[mask_index]
    idx = torch.topk(mask_hidden_state, k=5, dim=0)[1]
    words = [BERT_tokenizer.decode(i.item()).strip() for i in idx]
    list_of_list.append(words)
    print ("Mask ",index+1,"Guesses : ",words)

best_guess = ""
for j in list_of_list:
    best_guess = best_guess+" "+j[0]
    print(best_guess)

# Evaluating Transformer XL

In [105]:
Tran_XL_tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103')
Tran_XL_model = TransfoXLLMHeadModel.from_pretrained('transfo-xl-wt103')

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=9143470.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=9143613.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=856.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1140884800.0, style=ProgressStyle(descr…




In [107]:
#replacing empty lines with mask token
Tran_XL_tokenizer.mask_token = '[MASK]'
Tran_XL_text = text.replace('--',Tran_XL_tokenizer.mask_token)
print(Tran_XL_text)

A journey of [MASK] miles begins with a [MASK] step.


In [108]:
token_ids = Tran_XL_tokenizer.encode(Tran_XL_text,return_tensors='pt')
masked_position = (token_ids.squeeze() == Tran_XL_tokenizer.mask_token_id).nonzero()
masked_pos = [mask.item() for mask in masked_position ]
with torch.no_grad():
    output = Tran_XL_model(token_ids)

last_hidden_state = output[0].squeeze()

list_of_list =[]
for index,mask_index in enumerate(masked_pos):
    mask_hidden_state = last_hidden_state[mask_index]
    idx = torch.topk(mask_hidden_state, k=5, dim=0)[1]
    words = [BERT_tokenizer.decode(i.item()).strip() for i in idx]
    list_of_list.append(words)
    print ("Mask ",index+1,"Guesses : ",words)

best_guess = ""
for j in list_of_list:
    best_guess = best_guess+" "+j[0]
    print(best_guess)

Mask  1 Guesses :  ['[ u n u s e d 1 8 6 ]', '[ u n u s e d 2 3 ]', '[ u n u s e d 1 ]', '[ u n u s e d 4 ]', '[ u n u s e d 0 ]']
Mask  2 Guesses :  ['[ u n u s e d 1 8 6 ]', '[ u n u s e d 2 3 ]', '[ u n u s e d 1 ]', '[ u n u s e d 4 ]', '[ u n u s e d 3 ]']
 [ u n u s e d 1 8 6 ]
 [ u n u s e d 1 8 6 ] [ u n u s e d 1 8 6 ]


# Evaluating XLNet


In [122]:
!pip install sentencepiece
XLNET_tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
XLNET_model =  XLNetLMHeadModel.from_pretrained('xlnet-base-cased')



HBox(children=(FloatProgress(value=0.0, description='Downloading', max=760.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=467042463.0, style=ProgressStyle(descri…




In [None]:
#replacing empty lines with mask token
XLNET_tokenizer.mask_token = '[MASK]'
XLNET_text = text.replace('--',XLNET_tokenizer.mask_token)
print(XLNET_text)

In [None]:
token_ids = XLNET_tokenizer.encode(XLNET_text,return_tensors='pt')
masked_position = (token_ids.squeeze() == XLNET_tokenizer.mask_token_id).nonzero()
masked_pos = [mask.item() for mask in masked_position ]
with torch.no_grad():
    output = XLNET_model(token_ids)

last_hidden_state = output[0].squeeze()

list_of_list =[]
for index,mask_index in enumerate(masked_pos):
    mask_hidden_state = last_hidden_state[mask_index]
    idx = torch.topk(mask_hidden_state, k=5, dim=0)[1]
    words = [BERT_tokenizer.decode(i.item()).strip() for i in idx]
    list_of_list.append(words)
    print ("Mask ",index+1,"Guesses : ",words)

best_guess = ""
for j in list_of_list:
    best_guess = best_guess+" "+j[0]
    print(best_guess)

#Evaluating XLM

In [124]:
XLM_tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
XLM_model = TFXLMForTokenClassification.from_pretrained('xlm-mlm-en-2048')


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=646181.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=486639.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=840.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=2915637868.0, style=ProgressStyle(descr…




Some layers from the model checkpoint at xlm-mlm-en-2048 were not used when initializing TFXLMForTokenClassification: ['pred_layer_._proj']
- This IS expected if you are initializing TFXLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFXLMForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFXLMForTokenClassification were not initialized from the model checkpoint at xlm-mlm-en-2048 and are newly initialized: ['dropout_26', 'classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [128]:
#replacing empty lines with mask token
XLM_tokenizer.mask_token = '[MASK]'
XLM_text = text.replace('--',XLM_tokenizer.mask_token)
print(XLM_text)

A journey of [MASK] miles begins with a [MASK] step.


In [None]:
token_ids = XLM_tokenizer.encode(XLM_text,return_tensors='pt')
masked_position = (token_ids.squeeze() == XLM_tokenizer.mask_token_id).nonzero()
masked_pos = [mask.item() for mask in masked_position ]
with torch.no_grad():
    output = XLM_model(token_ids)

last_hidden_state = output[0].squeeze()

list_of_list =[]
for index,mask_index in enumerate(masked_pos):
    mask_hidden_state = last_hidden_state[mask_index]
    idx = torch.topk(mask_hidden_state, k=5, dim=0)[1]
    words = [BERT_tokenizer.decode(i.item()).strip() for i in idx]
    list_of_list.append(words)
    print ("Mask ",index+1,"Guesses : ",words)

best_guess = ""
for j in list_of_list:
    best_guess = best_guess+" "+j[0]
    print(best_guess)

#Similarly RoBERTA and DistilBERT can be evaluated from https://huggingface.co/transformers/model_doc/roberta.html https://huggingface.co/transformers/model_doc/distilbert.html