In [1]:
import json

import torch

import evaluate
import datasets

from tqdm.auto import tqdm

from transformers import MarianTokenizer, MarianMTModel

from generate_sequences import GreedyGenerator, BeamSearchGenerator

# Load the Model and Dataset

In [2]:
# load the translation model from transformers
# model_name = "Helsinki-NLP/opus-mt-ar-en"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to('cuda')

device = 'cuda' if torch.cuda.is_available() else 'cpu'


model_name = "marefa-nlp/marefa-mt-en-ar"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)


bleu_scorer = evaluate.load("sacrebleu")

test_dataset = datasets.load_dataset('iwslt2017','iwslt2017-ar-en', split='test')

In [3]:
source_language = 'en'
target_language = 'ar'

testing on 10 samples only.

In [4]:
input_texts = [example[source_language] for example in test_dataset['translation']][-10:]
targets = [example[target_language] for example in test_dataset['translation']][-10:]
len(input_texts), len(targets), input_texts[:5], targets[:5]

(10,
 10,
 ["One major consequence of this work  is that maybe all of these decades,  we've had the whole concept of cybernetic revolt  in reverse.",
  "It's not that machines first become intelligent  and then megalomaniacal  and try to take over the world.",
  "It's quite the opposite,  that the urge to take control  of all possible futures  is a more fundamental principle  than that of intelligence,  that general intelligence may in fact emerge  directly from this sort of control-grabbing,  rather than vice versa.",
  'Another important consequence is goal seeking.',
  "I'm often asked, how does the ability to seek goals  follow from this sort of framework?"],
 ['أحد العواقب الكبرى لهذا العمل هو أنه لربما طوال كل هذه العقود، كان لدينا المفهوم العكسي للثورة الآلية.',
  'الأمر ليس في أن الآلات تصبح ذكية في البداية ثم ينتابها جنون العظمة و تحاول السيطرة على العالم.',
  'إنه تماماً العكس، أن النزعة للسيطرة على كل الأزمنة المستقبلية الواردة هي مبدأ أساسي أكثر من مبدأ الذكاء، أن نواحي الذ

# Prepare and utility functions

setting `use_cache=False` as this disables optimizations being applied to transformers architecture [https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationConfig.use_cache].

In [5]:
model.generation_config.num_beams=1
model.generation_config.use_cache = False
model.generation_config.batch_size=2

In [6]:
def get_batches(inputs,batch_size):
    for i in tqdm(
            range(0, len(inputs), batch_size),
            desc="Generating Sequences",
            total=len(inputs) // batch_size,
        ):
        yield inputs[i : i + batch_size]

# Translate with Huggingface `generate` method

## Using Greedy method

setting `do_sample=False`.

In [7]:
def translate(texts):
    translated_texts = list()
    for batch in get_batches(texts,batch_size=model.generation_config.batch_size):
        translated_tokens = model.generate(
            do_sample=False,
            **tokenizer(batch, return_tensors="pt",padding=True),
        )
        translated_texts += [tokenizer.decode(t, skip_special_tokens=True) for t in translated_tokens]
    return translated_texts

In [8]:
# Example batch of input sentences
hf_predictions = translate(input_texts)
len(input_texts), len(hf_predictions), len(targets)

Generating Sequences:   0%|          | 0/5 [00:00<?, ?it/s]

(10, 10, 10)

In [9]:
bleu_scorer.compute(predictions=hf_predictions, references=targets)

{'score': 15.796125110909543,
 'counts': [128, 58, 28, 13],
 'totals': [264, 254, 244, 235],
 'precisions': [48.484848484848484,
  22.834645669291337,
  11.475409836065573,
  5.531914893617022],
 'bp': 0.9701515036966302,
 'sys_len': 264,
 'ref_len': 272}

### With multinomial sampling, top_k, top_p, and temperature

In [10]:
def translate(texts):
    translated_texts = list()
    for batch in get_batches(texts,batch_size=model.generation_config.batch_size):
        translated_tokens = model.generate(
            top_k=10,
            top_p=0.75,
            do_sample=True,
            temperature=0.9,
            **tokenizer(batch, return_tensors="pt",padding=True),
        )
        translated_texts += [tokenizer.decode(t, skip_special_tokens=True) for t in translated_tokens]
    return translated_texts

In [11]:
# Example batch of input sentences
hf_predictions = translate(input_texts)
len(input_texts), len(hf_predictions), len(targets)

Generating Sequences:   0%|          | 0/5 [00:00<?, ?it/s]

(10, 10, 10)

In [12]:
bleu_scorer.compute(predictions=hf_predictions, references=targets)

{'score': 18.35116938665859,
 'counts': [129, 61, 33, 19],
 'totals': [268, 258, 248, 239],
 'precisions': [48.134328358208954,
  23.643410852713178,
  13.306451612903226,
  7.949790794979079],
 'bp': 0.9851854581626466,
 'sys_len': 268,
 'ref_len': 272}

## Using beam search of width 4

We set explicitly `do_sample=False`.

In [13]:
def translate(texts):
    translated_texts = list()
    for batch in get_batches(texts,batch_size=model.generation_config.batch_size):
        translated_tokens = model.generate(
            num_beams=4,
            do_sample=False,
            length_penalty=0.6,
            **tokenizer(batch, return_tensors="pt",padding=True),
        )
        translated_texts += [tokenizer.decode(t, skip_special_tokens=True) for t in translated_tokens]
    return translated_texts

In [14]:
# Example batch of input sentences
beam_search_hf_predictions = translate(input_texts)
len(input_texts), len(beam_search_hf_predictions), len(targets)

Generating Sequences:   0%|          | 0/5 [00:00<?, ?it/s]

(10, 10, 10)

In [15]:
bleu_scorer.compute(predictions=beam_search_hf_predictions, references=targets)

{'score': 20.084845774979332,
 'counts': [134, 66, 38, 21],
 'totals': [262, 252, 242, 233],
 'precisions': [51.14503816793893,
  26.19047619047619,
  15.702479338842975,
  9.012875536480687],
 'bp': 0.9625512774839297,
 'sys_len': 262,
 'ref_len': 272}

### With multinomial, top-p, top-k sampling and temperature

In [16]:
def translate(texts):
    translated_texts = list()
    for batch in get_batches(texts,batch_size=model.generation_config.batch_size):
        translated_tokens = model.generate(
            top_k=10,
            top_p=0.75,
            num_beams=4,
            do_sample=True,
            temperature=0.9,
            length_penalty=0.6,
            **tokenizer(batch, return_tensors="pt",padding=True),
        )
        translated_texts += [tokenizer.decode(t, skip_special_tokens=True) for t in translated_tokens]
    return translated_texts

In [17]:
# Example batch of input sentences
beam_search_hf_predictions = translate(input_texts)
len(input_texts), len(beam_search_hf_predictions), len(targets)

Generating Sequences:   0%|          | 0/5 [00:00<?, ?it/s]

(10, 10, 10)

In [18]:
bleu_scorer.compute(predictions=beam_search_hf_predictions, references=targets)

{'score': 21.323465966136123,
 'counts': [134, 68, 41, 24],
 'totals': [262, 252, 242, 233],
 'precisions': [51.14503816793893,
  26.984126984126984,
  16.94214876033058,
  10.300429184549357],
 'bp': 0.9625512774839297,
 'sys_len': 262,
 'ref_len': 272}

# Translate using generate-sequences

checking model config

In [19]:
model.generation_config

GenerationConfig {
  "bad_words_ids": [
    [
      62801
    ]
  ],
  "batch_size": 2,
  "bos_token_id": 0,
  "decoder_start_token_id": 62801,
  "eos_token_id": 0,
  "forced_eos_token_id": 0,
  "max_length": 512,
  "pad_token_id": 62801,
  "use_cache": false
}

This is the generation function that is used for both, greedy and beam search generation

In [20]:
encoder_outputs = {}


def generate(inputs, decoder_input_ids):
    global encoder_outputs
    tokenizer_results = tokenizer(
        inputs,
        return_tensors="pt",
        padding=True,
    )
    if not encoder_outputs.get(json.dumps(inputs)):
        input_ids, attention_mask = (
            tokenizer_results["input_ids"],
            tokenizer_results["attention_mask"],
        )
        encoder_outputs[json.dumps(inputs)] = model.get_encoder()(
            input_ids.repeat_interleave(
                model.generation_config.num_beams,
                dim=0,
            ),
            return_dict=True,
            attention_mask=attention_mask,
        )
    model_outputs = model(
        **tokenizer_results,
        decoder_input_ids=decoder_input_ids,
        encoder_outputs=encoder_outputs[json.dumps(inputs)],
    )
    return model_outputs.logits

## Greedy Generation

In [21]:
greedy_sequences_generator = GreedyGenerator(
    use_tqdm=True,
    sort_samples=True,
    device=model.device,
    generation_forward=generate,
    batch_size=model.generation_config.batch_size,
    max_length=model.generation_config.max_length,
    eos_token_id=model.generation_config.eos_token_id,
    decoder_start_token_id=model.generation_config.decoder_start_token_id,
)

In [22]:
prediction_ids = greedy_sequences_generator.generate(input_texts)
predictions = tokenizer.batch_decode(prediction_ids, skip_special_tokens=True)
len(input_texts), len(predictions), len(targets)

Generating Sequences:   0%|          | 0/5 [00:00<?, ?it/s]

(10, 10, 10)

In [23]:
bleu_scorer.compute(predictions=predictions, references=targets)

{'score': 15.796125110909543,
 'counts': [128, 58, 28, 13],
 'totals': [264, 254, 244, 235],
 'precisions': [48.484848484848484,
  22.834645669291337,
  11.475409836065573,
  5.531914893617022],
 'bp': 0.9701515036966302,
 'sys_len': 264,
 'ref_len': 272}

### With multinomial and top-k, top-p sampling, and temperature

In [24]:
greedy_sequences_generator = GreedyGenerator(
    use_tqdm=True,
    temperature=0.9,
    top_k_sampling=10,
    sort_samples=True,
    top_p_sampling=0.75,
    device=model.device,
    multinomial_sampling=True,
    generation_forward=generate,
    batch_size=model.generation_config.batch_size,
    max_length=model.generation_config.max_length,
    eos_token_id=model.generation_config.eos_token_id,
    decoder_start_token_id=model.generation_config.decoder_start_token_id,
)

In [25]:
prediction_ids = greedy_sequences_generator.generate(input_texts)
predictions = tokenizer.batch_decode(prediction_ids, skip_special_tokens=True)
len(input_texts), len(predictions), len(targets)

Generating Sequences:   0%|          | 0/5 [00:00<?, ?it/s]

(10, 10, 10)

In [26]:
bleu_scorer.compute(predictions=predictions, references=targets)

{'score': 9.729423488181931,
 'counts': [109, 47, 19, 4],
 'totals': [266, 256, 246, 237],
 'precisions': [40.97744360902256,
  18.359375,
  7.723577235772358,
  1.6877637130801688],
 'bp': 0.9776961023999414,
 'sys_len': 266,
 'ref_len': 272}

## Beam Search Generation

In [27]:
beam_search_sequences_generator = BeamSearchGenerator(
    beam_width=4,
    use_tqdm=True,
    sort_samples=True,
    length_penalty=0.6,
    device=model.device,
    generation_forward=generate,
    batch_size=model.generation_config.batch_size,
    max_length=model.generation_config.max_length,
    eos_token_id=model.generation_config.eos_token_id,
    decoder_start_token_id=model.generation_config.decoder_start_token_id,
)

In [28]:
prediction_ids = beam_search_sequences_generator.generate(input_texts)
predictions = tokenizer.batch_decode(prediction_ids, skip_special_tokens=True)
len(input_texts), len(predictions), len(targets)

Generating Sequences:   0%|          | 0/5 [00:00<?, ?it/s]

(10, 10, 10)

In [29]:
bleu_scorer.compute(predictions=predictions, references=targets)

{'score': 20.16216711910865,
 'counts': [134, 67, 38, 21],
 'totals': [261, 251, 241, 232],
 'precisions': [51.34099616858238,
  26.693227091633467,
  15.767634854771785,
  9.051724137931034],
 'bp': 0.958730185172926,
 'sys_len': 261,
 'ref_len': 272}

### With multinomial, top-p,top-k sampling, and temperature

In [30]:
beam_search_sequences_generator = BeamSearchGenerator(
    beam_width=4,
    use_tqdm=True,
    temperature=0.9,
    sort_samples=True,
    top_k_sampling=10,
    length_penalty=0.6,
    top_p_sampling=0.75,
    device=model.device,
    multinomial_sampling=True,
    generation_forward=generate,
    batch_size=model.generation_config.batch_size,
    max_length=model.generation_config.max_length,
    eos_token_id=model.generation_config.eos_token_id,
    decoder_start_token_id=model.generation_config.decoder_start_token_id,
)

In [31]:
prediction_ids = beam_search_sequences_generator.generate(input_texts)
predictions = tokenizer.batch_decode(prediction_ids, skip_special_tokens=True)
len(input_texts), len(predictions), len(targets)

Generating Sequences:   0%|          | 0/5 [00:00<?, ?it/s]

(10, 10, 10)

In [32]:
bleu_scorer.compute(predictions=predictions, references=targets)

{'score': 20.27713611022614,
 'counts': [135, 68, 38, 21],
 'totals': [259, 249, 239, 230],
 'precisions': [52.12355212355212,
  27.309236947791163,
  15.899581589958158,
  9.130434782608695],
 'bp': 0.951045807200927,
 'sys_len': 259,
 'ref_len': 272}