In [1]:
import os, sys
sys.path.append('../')
import torch
from transformers import BartModel, BartForConditionalGeneration, GPT2LMHeadModel
from src.indobenchmark import IndoNLGTokenizer

In [2]:
def count_param(module, trainable=False):
    if trainable:
        return sum(p.numel() for p in module.parameters() if p.requires_grad)
    else:
        return sum(p.numel() for p in module.parameters())

# Init Model

In [22]:
bart_model = BartForConditionalGeneration.from_pretrained('indobenchmark/indobart')
gpt_model = GPT2LMHeadModel.from_pretrained('indobenchmark/indogpt')
tokenizer = IndoNLGTokenizer.from_pretrained('indobenchmark/indobart')

Some weights of BartForConditionalGeneration were not initialized from the model checkpoint at indobenchmark/indobart and are newly initialized: ['shared.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


# Test GPT Model

In [4]:
gpt_input = tokenizer.prepare_input_for_generation('aku adalah anak', model_type='indogpt', return_tensors='pt')
gpt_out = gpt_model.generate(**gpt_input)
tokenizer.decode(gpt_out[0])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


'<s> aku adalah anak pertama dari tiga bersaudara. </s> aku lahir di kota kecil yang sama dengan ayahku.'

In [5]:
gpt_input = tokenizer.prepare_input_for_generation('aku suka sekali makan', model_type='indogpt', return_tensors='pt')
gpt_out = gpt_model.generate(**gpt_input)
tokenizer.decode(gpt_out[0])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


'<s> aku suka sekali makan di sini. </s> aku suka sekali dengan menu yang ada di sini. aku'

In [6]:
gpt_input = tokenizer.prepare_input_for_generation('hai, bagaimana kabar', model_type='indogpt', return_tensors='pt')
gpt_out = gpt_model.generate(**gpt_input)
tokenizer.decode(gpt_out[0])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


'<s> hai, bagaimana kabar kalian? semoga sehat selalu ya. kali ini saya akan membahas tentang cara membuat'

# Test BART Model

In [25]:
bart_input = tokenizer.prepare_input_for_generation(['aku adalah <mask>'], model_type='indobart', return_tensors='pt')
bart_out = bart_model(**bart_input)
tokenizer.decode(bart_out.logits.topk(1).indices[:,:,:].squeeze())

'nyaéta selengkapnya hij <mask> </s>'

In [53]:
bart_input = tokenizer.prepare_input_for_generation(['abdi teh ayeuna','abdi teh ayeuna'], lang_token='[indonesian]',
    decoder_inputs=['abdi teh ayeuna','abdi teh ayeuna'], decoder_lang_token='[indonesian]', model_type='indobart', return_tensors='pt')
bart_out = bart_model(**bart_input)
tokenizer.decode(bart_out.logits.topk(1).indices[0,:,:].squeeze()), tokenizer.decode(bart_out.logits.topk(1).indices[1,:,:].squeeze())

('</s> </s> teh ayeuna </s>', '</s> </s> teh ayeuna </s>')