In [5]:
import os, sys
import torch
from transformers import BartModel, BartForConditionalGeneration, GPT2LMHeadModel
from indobenchmark import IndoNLGTokenizer

In [6]:
def count_param(module, trainable=False):
    if trainable:
        return sum(p.numel() for p in module.parameters() if p.requires_grad)
    else:
        return sum(p.numel() for p in module.parameters())

# Init Model

In [7]:
bart_model = BartModel.from_pretrained('indobenchmark/indobart')
gpt_model = GPT2LMHeadModel.from_pretrained('indobenchmark/indogpt')
tokenizer = IndoNLGTokenizer.from_pretrained('indobenchmark/indobart')

Some weights of BartModel were not initialized from the model checkpoint at indobenchmark/indobart and are newly initialized: ['shared.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


# Test GPT Model

In [8]:
gpt_input = torch.LongTensor([tokenizer.encode('<s> aku adalah anak', add_special_tokens=False)])
gpt_out = gpt_model.generate(gpt_input)
tokenizer.decode(gpt_out[0])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


'aku adalah anak pertama dari tiga bersaudara. aku lahir di kota kecil yang sama dengan ayahku.'

In [9]:
gpt_input = torch.LongTensor([tokenizer.encode('<s> aku suka sekali makan', add_special_tokens=False)])
gpt_out = gpt_model.generate(gpt_input)
tokenizer.decode(gpt_out[0])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


'aku suka sekali makan di sini. aku suka sekali dengan menu yang ada di sini. aku'

In [10]:
gpt_input = torch.LongTensor([tokenizer.encode('<s> hai, bagaimana kabar', add_special_tokens=False)])
gpt_out = gpt_model.generate(gpt_input)
tokenizer.decode(gpt_out[0])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


'hai, bagaimana kabar kalian? semoga sehat selalu ya. kali ini saya akan membahas tentang cara membuat'

# Test BART Model

In [14]:
bart_input = tokenizer.prepare_input_for_generation(['aku adalah <mask>'], model_type='indobart', return_tensors='pt')
bart_out = bart_model(**bart_input)
tokenizer.decode(bart_out.last_hidden_state.topk(1).indices[:,:,:].squeeze())

'm� paraek\x1f'

In [18]:
bart_input = tokenizer.prepare_input_for_generation(['dia adalah anak <mask>','<mask> adalah anak gembala'], 
    decoder_inputs=['dia adalah anak <mask>','aku adalah anak <mask>'], model_type='indobart', return_tensors='pt')
bart_out = bart_model(**bart_input)
tokenizer.decode(bart_out.last_hidden_state.topk(1).indices[0,:,:].squeeze()), tokenizer.decode(bart_out.last_hidden_state.topk(1).indices[1,:,:].squeeze())

('[ para para para\x1f[', 'para para para para\x1fn')