In [None]:
!pip install transformers



In [6]:
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast

# Carregar o modelo e o tokenizer
model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")

# Texto de entrada
src_text = "UN Chief Says There Is No Military Solution in Syria"

# Configurar o idioma de origem e destino
tokenizer.src_lang = "en_XX"  # Inglês como idioma de origem
inputs = tokenizer(src_text, return_tensors="pt")


In [8]:
# Gerar a tradução, forçando o idioma de destino para romeno
generated_tokens = model.generate(
    inputs["input_ids"],
    forced_bos_token_id=tokenizer.lang_code_to_id["tr_TR"]
)

# Decodificar os tokens gerados
translated_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)

print("Tradução:", translated_text)

Tradução: BM Başkanı: Suriye'de Askeri Çözüm Yok


In [9]:
from transformers import BartTokenizer, BartForConditionalGeneration
import torch

# Carregar o modelo e o tokenizer do BART pré-treinado
model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")

# Texto de entrada
input_text = "Artificial intelligence is transforming industries across the world."

# Tokenizar o texto de entrada
inputs = tokenizer([input_text], max_length=1024, return_tensors="pt", truncation=True)

# Parte 1: Geração de texto com diferentes seeds
print("### Geração de texto com diferentes seeds ###\n")

seeds = [42, 123, 789]  # Diferentes seeds para experimentar

for seed in seeds:
    torch.manual_seed(seed)
    generated_ids = model.generate(inputs['input_ids'], max_length=50, num_beams=5, early_stopping=True)
    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    print(f"Seed: {seed}\nGenerated Text: {generated_text}\n")


# Parte 2: Ajustar parâmetros de geração (temperatura e top-k sampling)
print("### Geração de texto com ajustes de temperatura e top-k sampling ###\n")

temperature_values = [0.7, 1.0, 1.5]  # Valores de temperatura
top_k_values = [30, 50, 100]          # Valores de top-k

for temp in temperature_values:
    for top_k in top_k_values:
        generated_ids = model.generate(
            inputs['input_ids'],
            max_length=50,
            do_sample=True,    # Ativar a amostragem para top-k sampling
            temperature=temp,   # Controlar a aleatoriedade
            top_k=top_k,        # Limitar as palavras candidatas ao top-k
        )
        generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
        print(f"Temperature: {temp}, Top-k: {top_k}\nGenerated Text: {generated_text}\n")


config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

### Geração de texto com diferentes seeds ###





Seed: 42
Generated Text: Artificial intelligence is transforming industries across the world. Here are some of the ways in which it is being used in the U.S. and around the world, and how it could be used in other parts of the world in the

Seed: 123
Generated Text: Artificial intelligence is transforming industries across the world. Here are some of the ways in which it is being used in the U.S. and around the world, and how it could be used in other parts of the world in the

Seed: 789
Generated Text: Artificial intelligence is transforming industries across the world. Here are some of the ways in which it is being used in the U.S. and around the world, and how it could be used in other parts of the world in the

### Geração de texto com ajustes de temperatura e top-k sampling ###

Temperature: 0.7, Top-k: 30
Generated Text: Artificial intelligence is transforming industries across the world. It is also revolutionising the way we interact with each other and the world around us. Here

In [2]:
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast

model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50")
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50", src_lang="en_XX", tgt_lang="ro_RO")

src_text = " UN Chief Says There Is No Military Solution in Syria"
tgt_text =  "Şeful ONU declară că nu există o soluţie militară în Siria"

model_inputs = tokenizer(src_text, return_tensors="pt")
with tokenizer.as_target_tokenizer():
    labels = tokenizer(tgt_text, return_tensors="pt").input_ids

model(**model_inputs, labels=labels) # forward pass




Seq2SeqLMOutput(loss=tensor(9.9719, grad_fn=<NllLossBackward0>), logits=tensor([[[ 5.9384e+01, -1.4623e+00,  3.7075e+01,  ...,  5.7301e+00,
          -9.7913e-01,  1.5201e+01],
         [ 5.9363e+01, -1.4596e+00,  3.6913e+01,  ...,  5.7274e+00,
          -9.5642e-01,  1.5132e+01],
         [ 1.0301e+01, -2.2473e-01,  1.5943e+01,  ..., -2.4285e+00,
           1.6040e+00,  7.8643e+00],
         ...,
         [ 5.3934e-02, -2.3913e-01,  1.2515e+01,  ...,  3.2412e-01,
           1.8379e+00,  5.7910e+00],
         [ 2.5746e+00, -1.8596e-01,  9.4286e+00,  ...,  4.8606e-01,
           1.5175e+00,  4.5009e+00],
         [ 1.1468e+01, -4.1693e-01,  2.4934e+01,  ...,  3.0823e-01,
           1.2864e+00,  1.1943e+01]]], grad_fn=<AddBackward0>), past_key_values=None, decoder_hidden_states=None, decoder_attentions=None, cross_attentions=None, encoder_last_hidden_state=tensor([[[ 0.0204,  0.0117, -0.0101,  ..., -0.0427, -0.0093,  0.0264],
         [-0.8181,  0.4772, -0.7508,  ..., -1.2650,  0.7026, -

In [3]:
## Dalton
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast

# article_hi = "संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है"
# article_ar = "الأمين العام للأمم المتحدة يقول إنه لا يوجد حل عسكري في سوريا."
texto_pt = "A volta dos que não foram"

model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-one-mmt")
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-one-mmt")

# translate Hindi to English
# tokenizer.src_lang = "hi_IN"
# encoded_hi = tokenizer(article_hi, return_tensors="pt")
# generated_tokens = model.generate(**encoded_hi)
# tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)

# # translate Arabic to English
# tokenizer.src_lang = "ar_AR"
# encoded_ar = tokenizer(article_ar, return_tensors="pt")
# generated_tokens = model.generate(**encoded_ar)
# tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)

tokenizer.src_lang = "pt_XX"
encoded_ar = tokenizer(texto_pt, return_tensors="pt")
generated_tokens = model.generate(**encoded_ar)
tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)

config.json:   0%|          | 0.00/1.51k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.44G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/268 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/461 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/649 [00:00<?, ?B/s]

["Around those that weren't."]