# Ejemplos de Large Language Model (LLM) disponibles en HuggingFace para procesamiento de Texto

https://huggingface.co/models

In [1]:
#@title Instalar paquete Transformers de HuggingFace
!pip install transformers




In [2]:
#@title Cargar Librerías

from transformers import AutoTokenizer, AutoModelForQuestionAnswering
from transformers import AutoModelForSeq2SeqLM, pipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch  # nota: usa Torch porque con TF directo tira error
import re
import random

import textwrap

print("Librerías cargadas.")

# determina si usa GPU o CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print("Device: ", device)

Librerías cargadas.
Device:  cpu


# Modelo para Responder Preguntas ( Question Answering / Q&A )

In [3]:
#@title Cargar Modelo Q&A

nombreModeloQA = "timpal0l/mdeberta-v3-base-squad2" #@param [ "timpal0l/mdeberta-v3-base-squad2", "mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es", "BSC-LT/roberta-large-bne-sqac"]
#@markdown otros modelos disponibles en: https://huggingface.co/models?pipeline_tag=question-answering&sort=trending


# cargar modelo
tokenizerQA = AutoTokenizer.from_pretrained(nombreModeloQA)
modelQA = AutoModelForQuestionAnswering.from_pretrained(nombreModeloQA)

# función para ejecutar el modelo
def responder_pregunta(question, context):
  # prepara input
  inputs = tokenizerQA(question, context, return_tensors="pt")
  with torch.no_grad():
      outputs = modelQA(**inputs)
  # detrmina rango
  answer_start_index = torch.argmax(outputs.start_logits)
  answer_end_index = torch.argmax(outputs.end_logits)
  # ejecuta el modelo y decodifica
  predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
  return tokenizerQA.decode(predict_answer_tokens)

print("\nModelo ", nombreModeloQA, " cargado.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/453 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/16.3M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/879 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]


Modelo  timpal0l/mdeberta-v3-base-squad2  cargado.


In [4]:
#@title Prueba 1 modelo Question Answering

context = "San Martín cabalgaba en su caballo blanco cuando ataco al enemigo." #@param {type:"string"}
question = "de que color es caballo San Martín?" #@param {type:"string"}

# procesa pregunta por modelo
res = responder_pregunta(question, context)

print("\n")
print("> ", textwrap.fill(context))
print("\t", question)
print("\t  -->", res)




>  San Martín cabalgaba en su caballo blanco cuando ataco al enemigo.
	 de que color es caballo San Martín?
	  --> blanco


In [5]:
#@title Prueba 2 modelo Q&A

context = "Luego de saltar sobre la pelota roja, el perro gris cay\xF3 dentro de la pileta" #@param {type:"string"}

question_1 = "que color es perro?" #@param {type:"string"}

question_2 = "que color es pelota?" #@param {type:"string"}

question_3 = "que hizo el perro?" #@param {type:"string"}

question_4 = "donde salto?" #@param {type:"string"}

question_5 = "donde termino?" #@param {type:"string"}


questionList = [question_1, question_2, question_3, question_4, question_5]


print("\n")
print("> ", textwrap.fill(context, 100))

for q in questionList:
  # procesa pregunta por modelo
  res = responder_pregunta(q, context)

  print("\t", q)
  print("\t  -->", res)
  print("")




>  Luego de saltar sobre la pelota roja, el perro gris cayó dentro de la pileta
	 que color es perro?
	  --> gris

	 que color es pelota?
	  --> roja

	 que hizo el perro?
	  --> saltar sobre la pelota roja, el perro gris cayó dentro de la pileta

	 donde salto?
	  --> sobre la pelota roja

	 donde termino?
	  --> dentro de la pileta



# Modelo Traductor ( Translation )

In [6]:
#@title Cargar Modelo Traductor

nombreModeloTrad = "facebook/nllb-200-distilled-600M" #@param[ "facebook/nllb-200-distilled-600M" ]
#@markdown otros modelos disponibles en: https://huggingface.co/models?pipeline_tag=translation&sort=trending

tokenizerTrad = AutoTokenizer.from_pretrained(nombreModeloTrad)
modelTrad = AutoModelForSeq2SeqLM.from_pretrained(nombreModeloTrad)

def translate(text, src_lang, tgt_lang, max_length=500):

    TASK = "translation"
    translation_pipeline = pipeline(TASK,
                                    model=modelTrad,
                                    tokenizer=tokenizerTrad,
                                    src_lang=src_lang,
                                    tgt_lang=tgt_lang,
                                    max_length=max_length,
                                    device=device)

    result = translation_pipeline(text)
    return result[0]['translation_text']

print("\nModelo ", nombreModeloTrad, " cargado.")

tokenizer_config.json:   0%|          | 0.00/564 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/4.85M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.3M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/3.55k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/846 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.46G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]


Modelo  facebook/nllb-200-distilled-600M  cargado.


In [7]:
#@title Probar Traductor 1

lenguaje_texto = "eng_Latn" #@param ["ace_Arab",  "ace_Latn",  "acm_Arab",  "acq_Arab",  "aeb_Arab",  "afr_Latn",  "ajp_Arab",  "aka_Latn",  "amh_Ethi",  "apc_Arab",  "arb_Arab",  "ars_Arab",  "ary_Arab",  "arz_Arab",  "asm_Beng",  "ast_Latn",  "awa_Deva",  "ayr_Latn",  "azb_Arab",  "azj_Latn",  "bak_Cyrl",  "bam_Latn",  "ban_Latn",  "bel_Cyrl",  "bem_Latn",  "ben_Beng",  "bho_Deva",  "bjn_Arab",  "bjn_Latn",  "bod_Tibt",  "bos_Latn",  "bug_Latn",  "bul_Cyrl",  "cat_Latn",  "ceb_Latn",  "ces_Latn",  "cjk_Latn",  "ckb_Arab",  "crh_Latn",  "cym_Latn",  "dan_Latn",  "deu_Latn",  "dik_Latn",  "dyu_Latn",  "dzo_Tibt",  "ell_Grek",  "eng_Latn",  "epo_Latn",  "est_Latn",  "eus_Latn",  "ewe_Latn",  "fao_Latn",  "pes_Arab",  "fij_Latn",  "fin_Latn",  "fon_Latn",  "fra_Latn",  "fur_Latn",  "fuv_Latn",  "gla_Latn",  "gle_Latn",  "glg_Latn",  "grn_Latn",  "guj_Gujr",  "hat_Latn",  "hau_Latn",  "heb_Hebr",  "hin_Deva",  "hne_Deva",  "hrv_Latn",  "hun_Latn",  "hye_Armn",  "ibo_Latn",  "ilo_Latn",  "ind_Latn",  "isl_Latn",  "ita_Latn",  "jav_Latn",  "jpn_Jpan",  "kab_Latn",  "kac_Latn",  "kam_Latn",  "kan_Knda",  "kas_Arab",  "kas_Deva",  "kat_Geor",  "knc_Arab",  "knc_Latn",  "kaz_Cyrl",  "kbp_Latn",  "kea_Latn",  "khm_Khmr",  "kik_Latn",  "kin_Latn",  "kir_Cyrl",  "kmb_Latn",  "kon_Latn",  "kor_Hang",  "kmr_Latn",  "lao_Laoo",  "lvs_Latn",  "lij_Latn",  "lim_Latn",  "lin_Latn",  "lit_Latn",  "lmo_Latn",  "ltg_Latn",  "ltz_Latn",  "lua_Latn",  "lug_Latn",  "luo_Latn",  "lus_Latn",  "mag_Deva",  "mai_Deva",  "mal_Mlym",  "mar_Deva",  "min_Latn",  "mkd_Cyrl",  "plt_Latn",  "mlt_Latn",  "mni_Beng",  "khk_Cyrl",  "mos_Latn",  "mri_Latn",  "zsm_Latn",  "mya_Mymr",  "nld_Latn",  "nno_Latn",  "nob_Latn",  "npi_Deva",  "nso_Latn",  "nus_Latn",  "nya_Latn",  "oci_Latn",  "gaz_Latn",  "ory_Orya",  "pag_Latn",  "pan_Guru",  "pap_Latn",  "pol_Latn",  "por_Latn",  "prs_Arab",  "pbt_Arab",  "quy_Latn",  "ron_Latn",  "run_Latn",  "rus_Cyrl",  "sag_Latn",  "san_Deva",  "sat_Beng",  "scn_Latn",  "shn_Mymr",  "sin_Sinh",  "slk_Latn",  "slv_Latn",  "smo_Latn",  "sna_Latn",  "snd_Arab",  "som_Latn",  "sot_Latn",  "spa_Latn",  "als_Latn",  "srd_Latn",  "srp_Cyrl",  "ssw_Latn",  "sun_Latn",  "swe_Latn",  "swh_Latn",  "szl_Latn",  "tam_Taml",  "tat_Cyrl",  "tel_Telu",  "tgk_Cyrl",  "tgl_Latn",  "tha_Thai",  "tir_Ethi",  "taq_Latn",  "taq_Tfng",  "tpi_Latn",  "tsn_Latn",  "tso_Latn",  "tuk_Latn",  "tum_Latn",  "tur_Latn",  "twi_Latn",  "tzm_Tfng",  "uig_Arab",  "ukr_Cyrl",  "umb_Latn",  "urd_Arab",  "uzn_Latn",  "vec_Latn",  "vie_Latn",  "war_Latn",  "wol_Latn",  "xho_Latn",  "ydd_Hebr",  "yor_Latn",  "yue_Hant",  "zho_Hans",  "zho_Hant",  "zul_Latn" ]

text_to_translate = "Life is like a box of chocolates" #@param {type:"string"}

lenguaje_traduccion = "spa_Latn" #@param ["ace_Arab",  "ace_Latn",  "acm_Arab",  "acq_Arab",  "aeb_Arab",  "afr_Latn",  "ajp_Arab",  "aka_Latn",  "amh_Ethi",  "apc_Arab",  "arb_Arab",  "ars_Arab",  "ary_Arab",  "arz_Arab",  "asm_Beng",  "ast_Latn",  "awa_Deva",  "ayr_Latn",  "azb_Arab",  "azj_Latn",  "bak_Cyrl",  "bam_Latn",  "ban_Latn",  "bel_Cyrl",  "bem_Latn",  "ben_Beng",  "bho_Deva",  "bjn_Arab",  "bjn_Latn",  "bod_Tibt",  "bos_Latn",  "bug_Latn",  "bul_Cyrl",  "cat_Latn",  "ceb_Latn",  "ces_Latn",  "cjk_Latn",  "ckb_Arab",  "crh_Latn",  "cym_Latn",  "dan_Latn",  "deu_Latn",  "dik_Latn",  "dyu_Latn",  "dzo_Tibt",  "ell_Grek",  "eng_Latn",  "epo_Latn",  "est_Latn",  "eus_Latn",  "ewe_Latn",  "fao_Latn",  "pes_Arab",  "fij_Latn",  "fin_Latn",  "fon_Latn",  "fra_Latn",  "fur_Latn",  "fuv_Latn",  "gla_Latn",  "gle_Latn",  "glg_Latn",  "grn_Latn",  "guj_Gujr",  "hat_Latn",  "hau_Latn",  "heb_Hebr",  "hin_Deva",  "hne_Deva",  "hrv_Latn",  "hun_Latn",  "hye_Armn",  "ibo_Latn",  "ilo_Latn",  "ind_Latn",  "isl_Latn",  "ita_Latn",  "jav_Latn",  "jpn_Jpan",  "kab_Latn",  "kac_Latn",  "kam_Latn",  "kan_Knda",  "kas_Arab",  "kas_Deva",  "kat_Geor",  "knc_Arab",  "knc_Latn",  "kaz_Cyrl",  "kbp_Latn",  "kea_Latn",  "khm_Khmr",  "kik_Latn",  "kin_Latn",  "kir_Cyrl",  "kmb_Latn",  "kon_Latn",  "kor_Hang",  "kmr_Latn",  "lao_Laoo",  "lvs_Latn",  "lij_Latn",  "lim_Latn",  "lin_Latn",  "lit_Latn",  "lmo_Latn",  "ltg_Latn",  "ltz_Latn",  "lua_Latn",  "lug_Latn",  "luo_Latn",  "lus_Latn",  "mag_Deva",  "mai_Deva",  "mal_Mlym",  "mar_Deva",  "min_Latn",  "mkd_Cyrl",  "plt_Latn",  "mlt_Latn",  "mni_Beng",  "khk_Cyrl",  "mos_Latn",  "mri_Latn",  "zsm_Latn",  "mya_Mymr",  "nld_Latn",  "nno_Latn",  "nob_Latn",  "npi_Deva",  "nso_Latn",  "nus_Latn",  "nya_Latn",  "oci_Latn",  "gaz_Latn",  "ory_Orya",  "pag_Latn",  "pan_Guru",  "pap_Latn",  "pol_Latn",  "por_Latn",  "prs_Arab",  "pbt_Arab",  "quy_Latn",  "ron_Latn",  "run_Latn",  "rus_Cyrl",  "sag_Latn",  "san_Deva",  "sat_Beng",  "scn_Latn",  "shn_Mymr",  "sin_Sinh",  "slk_Latn",  "slv_Latn",  "smo_Latn",  "sna_Latn",  "snd_Arab",  "som_Latn",  "sot_Latn",  "spa_Latn",  "als_Latn",  "srd_Latn",  "srp_Cyrl",  "ssw_Latn",  "sun_Latn",  "swe_Latn",  "swh_Latn",  "szl_Latn",  "tam_Taml",  "tat_Cyrl",  "tel_Telu",  "tgk_Cyrl",  "tgl_Latn",  "tha_Thai",  "tir_Ethi",  "taq_Latn",  "taq_Tfng",  "tpi_Latn",  "tsn_Latn",  "tso_Latn",  "tuk_Latn",  "tum_Latn",  "tur_Latn",  "twi_Latn",  "tzm_Tfng",  "uig_Arab",  "ukr_Cyrl",  "umb_Latn",  "urd_Arab",  "uzn_Latn",  "vec_Latn",  "vie_Latn",  "war_Latn",  "wol_Latn",  "xho_Latn",  "ydd_Hebr",  "yor_Latn",  "yue_Hant",  "zho_Hans",  "zho_Hant",  "zul_Latn" ]

# traduce texto
resTrad = translate(text_to_translate, lenguaje_texto, lenguaje_traduccion)

print("\n", text_to_translate)
print("\t--> ", resTrad)

# traduce al revés (para probar)
resTrad2 = translate(resTrad, lenguaje_traduccion, lenguaje_texto)

print("\n", resTrad)
print("\t--> ", resTrad2)



 Life is like a box of chocolates
	-->  La vida es como una caja de chocolate

 La vida es como una caja de chocolate
	-->  Life is like a box of chocolate


In [8]:
#@title Probar Traductor 2

lenguaje_texto = "spa_Latn" #@param ["ace_Arab",  "ace_Latn",  "acm_Arab",  "acq_Arab",  "aeb_Arab",  "afr_Latn",  "ajp_Arab",  "aka_Latn",  "amh_Ethi",  "apc_Arab",  "arb_Arab",  "ars_Arab",  "ary_Arab",  "arz_Arab",  "asm_Beng",  "ast_Latn",  "awa_Deva",  "ayr_Latn",  "azb_Arab",  "azj_Latn",  "bak_Cyrl",  "bam_Latn",  "ban_Latn",  "bel_Cyrl",  "bem_Latn",  "ben_Beng",  "bho_Deva",  "bjn_Arab",  "bjn_Latn",  "bod_Tibt",  "bos_Latn",  "bug_Latn",  "bul_Cyrl",  "cat_Latn",  "ceb_Latn",  "ces_Latn",  "cjk_Latn",  "ckb_Arab",  "crh_Latn",  "cym_Latn",  "dan_Latn",  "deu_Latn",  "dik_Latn",  "dyu_Latn",  "dzo_Tibt",  "ell_Grek",  "eng_Latn",  "epo_Latn",  "est_Latn",  "eus_Latn",  "ewe_Latn",  "fao_Latn",  "pes_Arab",  "fij_Latn",  "fin_Latn",  "fon_Latn",  "fra_Latn",  "fur_Latn",  "fuv_Latn",  "gla_Latn",  "gle_Latn",  "glg_Latn",  "grn_Latn",  "guj_Gujr",  "hat_Latn",  "hau_Latn",  "heb_Hebr",  "hin_Deva",  "hne_Deva",  "hrv_Latn",  "hun_Latn",  "hye_Armn",  "ibo_Latn",  "ilo_Latn",  "ind_Latn",  "isl_Latn",  "ita_Latn",  "jav_Latn",  "jpn_Jpan",  "kab_Latn",  "kac_Latn",  "kam_Latn",  "kan_Knda",  "kas_Arab",  "kas_Deva",  "kat_Geor",  "knc_Arab",  "knc_Latn",  "kaz_Cyrl",  "kbp_Latn",  "kea_Latn",  "khm_Khmr",  "kik_Latn",  "kin_Latn",  "kir_Cyrl",  "kmb_Latn",  "kon_Latn",  "kor_Hang",  "kmr_Latn",  "lao_Laoo",  "lvs_Latn",  "lij_Latn",  "lim_Latn",  "lin_Latn",  "lit_Latn",  "lmo_Latn",  "ltg_Latn",  "ltz_Latn",  "lua_Latn",  "lug_Latn",  "luo_Latn",  "lus_Latn",  "mag_Deva",  "mai_Deva",  "mal_Mlym",  "mar_Deva",  "min_Latn",  "mkd_Cyrl",  "plt_Latn",  "mlt_Latn",  "mni_Beng",  "khk_Cyrl",  "mos_Latn",  "mri_Latn",  "zsm_Latn",  "mya_Mymr",  "nld_Latn",  "nno_Latn",  "nob_Latn",  "npi_Deva",  "nso_Latn",  "nus_Latn",  "nya_Latn",  "oci_Latn",  "gaz_Latn",  "ory_Orya",  "pag_Latn",  "pan_Guru",  "pap_Latn",  "pol_Latn",  "por_Latn",  "prs_Arab",  "pbt_Arab",  "quy_Latn",  "ron_Latn",  "run_Latn",  "rus_Cyrl",  "sag_Latn",  "san_Deva",  "sat_Beng",  "scn_Latn",  "shn_Mymr",  "sin_Sinh",  "slk_Latn",  "slv_Latn",  "smo_Latn",  "sna_Latn",  "snd_Arab",  "som_Latn",  "sot_Latn",  "spa_Latn",  "als_Latn",  "srd_Latn",  "srp_Cyrl",  "ssw_Latn",  "sun_Latn",  "swe_Latn",  "swh_Latn",  "szl_Latn",  "tam_Taml",  "tat_Cyrl",  "tel_Telu",  "tgk_Cyrl",  "tgl_Latn",  "tha_Thai",  "tir_Ethi",  "taq_Latn",  "taq_Tfng",  "tpi_Latn",  "tsn_Latn",  "tso_Latn",  "tuk_Latn",  "tum_Latn",  "tur_Latn",  "twi_Latn",  "tzm_Tfng",  "uig_Arab",  "ukr_Cyrl",  "umb_Latn",  "urd_Arab",  "uzn_Latn",  "vec_Latn",  "vie_Latn",  "war_Latn",  "wol_Latn",  "xho_Latn",  "ydd_Hebr",  "yor_Latn",  "yue_Hant",  "zho_Hans",  "zho_Hant",  "zul_Latn" ]

text_to_translate = "Que lindo día para un asado!" #@param {type:"string"}

cant_idiomas_usar = 25 #@param {type:"integer"}

if cant_idiomas_usar < 2:
  cant_idiomas_usar = 2

print("\n", text_to_translate)

lista_idiomas = ["ace_Arab",  "ace_Latn",  "acm_Arab",  "acq_Arab",  "aeb_Arab",  "afr_Latn",  "ajp_Arab",  "aka_Latn",  "amh_Ethi",  "apc_Arab",  "arb_Arab",  "ars_Arab",  "ary_Arab",  "arz_Arab",  "asm_Beng",  "ast_Latn",  "awa_Deva",  "ayr_Latn",  "azb_Arab",  "azj_Latn",  "bak_Cyrl",  "bam_Latn",  "ban_Latn",  "bel_Cyrl",  "bem_Latn",  "ben_Beng",  "bho_Deva",  "bjn_Arab",  "bjn_Latn",  "bod_Tibt",  "bos_Latn",  "bug_Latn",  "bul_Cyrl",  "cat_Latn",  "ceb_Latn",  "ces_Latn",  "cjk_Latn",  "ckb_Arab",  "crh_Latn",  "cym_Latn",  "dan_Latn",  "deu_Latn",  "dik_Latn",  "dyu_Latn",  "dzo_Tibt",  "ell_Grek",  "eng_Latn",  "epo_Latn",  "est_Latn",  "eus_Latn",  "ewe_Latn",  "fao_Latn",  "pes_Arab",  "fij_Latn",  "fin_Latn",  "fon_Latn",  "fra_Latn",  "fur_Latn",  "fuv_Latn",  "gla_Latn",  "gle_Latn",  "glg_Latn",  "grn_Latn",  "guj_Gujr",  "hat_Latn",  "hau_Latn",  "heb_Hebr",  "hin_Deva",  "hne_Deva",  "hrv_Latn",  "hun_Latn",  "hye_Armn",  "ibo_Latn",  "ilo_Latn",  "ind_Latn",  "isl_Latn",  "ita_Latn",  "jav_Latn",  "jpn_Jpan",  "kab_Latn",  "kac_Latn",  "kam_Latn",  "kan_Knda",  "kas_Arab",  "kas_Deva",  "kat_Geor",  "knc_Arab",  "knc_Latn",  "kaz_Cyrl",  "kbp_Latn",  "kea_Latn",  "khm_Khmr",  "kik_Latn",  "kin_Latn",  "kir_Cyrl",  "kmb_Latn",  "kon_Latn",  "kor_Hang",  "kmr_Latn",  "lao_Laoo",  "lvs_Latn",  "lij_Latn",  "lim_Latn",  "lin_Latn",  "lit_Latn",  "lmo_Latn",  "ltg_Latn",  "ltz_Latn",  "lua_Latn",  "lug_Latn",  "luo_Latn",  "lus_Latn",  "mag_Deva",  "mai_Deva",  "mal_Mlym",  "mar_Deva",  "min_Latn",  "mkd_Cyrl",  "plt_Latn",  "mlt_Latn",  "mni_Beng",  "khk_Cyrl",  "mos_Latn",  "mri_Latn",  "zsm_Latn",  "mya_Mymr",  "nld_Latn",  "nno_Latn",  "nob_Latn",  "npi_Deva",  "nso_Latn",  "nus_Latn",  "nya_Latn",  "oci_Latn",  "gaz_Latn",  "ory_Orya",  "pag_Latn",  "pan_Guru",  "pap_Latn",  "pol_Latn",  "por_Latn",  "prs_Arab",  "pbt_Arab",  "quy_Latn",  "ron_Latn",  "run_Latn",  "rus_Cyrl",  "sag_Latn",  "san_Deva",  "sat_Beng",  "scn_Latn",  "shn_Mymr",  "sin_Sinh",  "slk_Latn",  "slv_Latn",  "smo_Latn",  "sna_Latn",  "snd_Arab",  "som_Latn",  "sot_Latn",  "spa_Latn",  "als_Latn",  "srd_Latn",  "srp_Cyrl",  "ssw_Latn",  "sun_Latn",  "swe_Latn",  "swh_Latn",  "szl_Latn",  "tam_Taml",  "tat_Cyrl",  "tel_Telu",  "tgk_Cyrl",  "tgl_Latn",  "tha_Thai",  "tir_Ethi",  "taq_Latn",  "taq_Tfng",  "tpi_Latn",  "tsn_Latn",  "tso_Latn",  "tuk_Latn",  "tum_Latn",  "tur_Latn",  "twi_Latn",  "tzm_Tfng",  "uig_Arab",  "ukr_Cyrl",  "umb_Latn",  "urd_Arab",  "uzn_Latn",  "vec_Latn",  "vie_Latn",  "war_Latn",  "wol_Latn",  "xho_Latn",  "ydd_Hebr",  "yor_Latn",  "yue_Hant",  "zho_Hans",  "zho_Hant",  "zul_Latn" ]

lng_random = random.sample(lista_idiomas,  cant_idiomas_usar)

for lng_trg in lng_random:

  # traduce texto
  resTrad = translate(text_to_translate, lenguaje_texto, lng_trg)
  print("\t[", lng_trg,"] --> ", resTrad)



 Que lindo día para un asado!
	[ kon_Latn ] -->  Kilumbu mosi ya kitoko sambu na kudia mampa ya masa ya masa!
	[ uig_Arab ] -->  ھەقىقەتەن ياخشى بىر كۈن.
	[ pag_Latn ] -->  Agaylan maabig ya agew parad sakey ya asado!
	[ zho_Hant ] -->  這是一天很美好,
	[ epo_Latn ] -->  Kia bela tago por rostado!
	[ ibo_Latn ] -->  Lee ụbọchị mara mma maka a roast!
	[ dzo_Tibt ] -->  ཉིནམ་འདི་ བཞེས་སྒོ་གི་དོན་ལུ་གནམ་མེད་ས་མེད་ལེགས་ཤོམ་ཨིན་
	[ ukr_Cyrl ] -->  Як прекрасний день для оброзу!
	[ slv_Latn ] -->  Kako lep dan za prah!
	[ ell_Grek ] -->  Τι ωραία μέρα για ένα ψητό!
	[ gla_Latn ] -->  Dè latha brèagha airson brògan!
	[ heb_Hebr ] -->  איזה יום יפה בשביל טוסט!
	[ aeb_Arab ] -->  شْنُوَّة نْهَارْ مُوجُودْ لِلصَّنْدَة!
	[ kac_Latn ] -->  Dai gaw shat sha na matu grai kaja ai nhtoi re.
	[ ars_Arab ] -->  يوم جميل لخبز!
	[ uzn_Latn ] -->  Qani, qanday go'zal kun ro'za!
	[ khk_Cyrl ] -->  Энэ бол хаварны өдөр юм.
	[ swh_Latn ] -->  Ni siku nzuri sana kwa ajili ya barbeque!
	[ ssw_Latn ] -->  Lelusuku lol

#Modelo para Hacer Resumenes ( Summarization )

In [9]:
#@title Cargar Modelo Summarization

model_nameSum = "csebuetnlp/mT5_multilingual_XLSum" #@param [ "csebuetnlp/mT5_multilingual_XLSum" ]
#@markdown otros modelos disponibles en: https://huggingface.co/models?pipeline_tag=summarization&sort=trending


WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))


tokenizerSum = AutoTokenizer.from_pretrained(model_nameSum)
modelSum = AutoModelForSeq2SeqLM.from_pretrained(model_nameSum)

def generate_summary(article_text):
    input_ids = tokenizerSum(
        [WHITESPACE_HANDLER(article_text)],
        return_tensors="pt",
        padding="max_length",
        truncation=True,
        max_length=512
    )["input_ids"]

    output_ids = modelSum.generate(
        input_ids=input_ids,
        max_length=84,
        no_repeat_ngram_size=2,
        num_beams=4
    )[0]

    summary = tokenizerSum.decode(
        output_ids,
        skip_special_tokens=True,
        clean_up_tokenization_spaces=False
    )

    return summary

print("\nModelo ", model_nameSum, " cargado.")

tokenizer_config.json:   0%|          | 0.00/375 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/730 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


pytorch_model.bin:   0%|          | 0.00/2.33G [00:00<?, ?B/s]


Modelo  csebuetnlp/mT5_multilingual_XLSum  cargado.


In [10]:
#@title Probar Modelo Summartizaton

text_to_summ = "Un modelo transformer es una red neuronal que aprende contexto y, por lo tanto, significado mediante el seguimiento de relaciones en datos secuenciales como las palabras de esta oraci\xF3n. Los modelos transformer aplican un conjunto en evoluci\xF3n de t\xE9cnicas matem\xE1ticas, llamadas atenci\xF3n o atenci\xF3n propia, para detectar formas sutiles en que los elementos de datos en una serie se influencian y dependen entre s\xED. Los transformers se describieron por primera vez en un documento de 2017 de Google. Estos transformers son una de las clases m\xE1s nuevas y potentes de modelos inventados hasta la fecha. Est\xE1n impulsando una ola de avances en machine learning que algunos han apodado como la \xABIA de transformer\xBB. Los transformers est\xE1 traduciendo texto y habla casi en tiempo real, lo que permite el acceso a reuniones y aulas para diversos asistentes con discapacidades auditivas. En agosto de 2021, los investigadores de Stanford llamaron a los transformers los \xABmodelos de base\xBB porque ven que impulsan un cambio de paradigma en la IA. \xABLa gran escala y el alcance de los modelos de base en los \xFAltimos a\xF1os han extendido nuestra imaginaci\xF3n de lo que es posible\xBB, escribieron." #@param {type:"string"}

print("> ", textwrap.fill(text_to_summ, 100))

print(" -->", generate_summary(text_to_summ) )

>  Un modelo transformer es una red neuronal que aprende contexto y, por lo tanto, significado mediante
el seguimiento de relaciones en datos secuenciales como las palabras de esta oración. Los modelos
transformer aplican un conjunto en evolución de técnicas matemáticas, llamadas atención o atención
propia, para detectar formas sutiles en que los elementos de datos en una serie se influencian y
dependen entre sí. Los transformers se describieron por primera vez en un documento de 2017 de
Google. Estos transformers son una de las clases más nuevas y potentes de modelos inventados hasta
la fecha. Están impulsando una ola de avances en machine learning que algunos han apodado como la
«IA de transformer». Los transformers está traduciendo texto y habla casi en tiempo real, lo que
permite el acceso a reuniones y aulas para diversos asistentes con discapacidades auditivas. En
agosto de 2021, los investigadores de Stanford llamaron a los transformers los «modelos de base»
porque ven que impul

# Probar Modelos combinados

In [11]:
#@title Probar Todos los Modelos

texto_completo = "Artificial intelligence (AI) is the intelligence of machines or software, as opposed to the intelligence of humans or other animals. It is a field of study in computer science that develops and studies intelligent machines. Such machines may be called AIs. AI technology is widely used throughout industry, government, and science. Some high-profile applications are: advanced web search engines (e.g., Google Search), recommendation systems (used by YouTube, Amazon, and Netflix), understanding human speech (such as Google Assistant, Siri, and Alexa), self-driving cars (e.g., Waymo), generative and creative tools (ChatGPT and AI art), and superhuman play and analysis in strategy games (such as chess and Go). Alan Turing was the first person to conduct substantial research in the field that he called Machine Intelligence. Artificial intelligence was founded as an academic discipline in 1956. The field went through multiple cycles of optimism followed by disappointment and loss of funding. Funding and interest vastly increased after 2012 when deep learning surpassed all previous AI techniques, and after 2017 with the transformer architecture.[9] This led to the AI spring of the early 2020s, with companies, universities, and laboratories overwhelmingly based in the United States pioneering significant advances in artificial intelligence. The various sub-fields of AI research are centered around particular goals and the use of particular tools. The traditional goals of AI research include reasoning, knowledge representation, planning, learning, natural language processing, perception, and support for robotics.[a] General intelligence (the ability to complete any task performable by a human) is among the field's long-term goals. To solve these problems, AI researchers have adapted and integrated a wide range of problem-solving techniques, including search and mathematical optimization, formal logic, artificial neural networks, and methods based on statistics, operations research, and economics.[b] AI also draws upon psychology, linguistics, philosophy, neuroscience and other fields." #@param {type:"string"}
lenguaje_texto = "eng_Latn" #@param ["ace_Arab",  "ace_Latn",  "acm_Arab",  "acq_Arab",  "aeb_Arab",  "afr_Latn",  "ajp_Arab",  "aka_Latn",  "amh_Ethi",  "apc_Arab",  "arb_Arab",  "ars_Arab",  "ary_Arab",  "arz_Arab",  "asm_Beng",  "ast_Latn",  "awa_Deva",  "ayr_Latn",  "azb_Arab",  "azj_Latn",  "bak_Cyrl",  "bam_Latn",  "ban_Latn",  "bel_Cyrl",  "bem_Latn",  "ben_Beng",  "bho_Deva",  "bjn_Arab",  "bjn_Latn",  "bod_Tibt",  "bos_Latn",  "bug_Latn",  "bul_Cyrl",  "cat_Latn",  "ceb_Latn",  "ces_Latn",  "cjk_Latn",  "ckb_Arab",  "crh_Latn",  "cym_Latn",  "dan_Latn",  "deu_Latn",  "dik_Latn",  "dyu_Latn",  "dzo_Tibt",  "ell_Grek",  "eng_Latn",  "epo_Latn",  "est_Latn",  "eus_Latn",  "ewe_Latn",  "fao_Latn",  "pes_Arab",  "fij_Latn",  "fin_Latn",  "fon_Latn",  "fra_Latn",  "fur_Latn",  "fuv_Latn",  "gla_Latn",  "gle_Latn",  "glg_Latn",  "grn_Latn",  "guj_Gujr",  "hat_Latn",  "hau_Latn",  "heb_Hebr",  "hin_Deva",  "hne_Deva",  "hrv_Latn",  "hun_Latn",  "hye_Armn",  "ibo_Latn",  "ilo_Latn",  "ind_Latn",  "isl_Latn",  "ita_Latn",  "jav_Latn",  "jpn_Jpan",  "kab_Latn",  "kac_Latn",  "kam_Latn",  "kan_Knda",  "kas_Arab",  "kas_Deva",  "kat_Geor",  "knc_Arab",  "knc_Latn",  "kaz_Cyrl",  "kbp_Latn",  "kea_Latn",  "khm_Khmr",  "kik_Latn",  "kin_Latn",  "kir_Cyrl",  "kmb_Latn",  "kon_Latn",  "kor_Hang",  "kmr_Latn",  "lao_Laoo",  "lvs_Latn",  "lij_Latn",  "lim_Latn",  "lin_Latn",  "lit_Latn",  "lmo_Latn",  "ltg_Latn",  "ltz_Latn",  "lua_Latn",  "lug_Latn",  "luo_Latn",  "lus_Latn",  "mag_Deva",  "mai_Deva",  "mal_Mlym",  "mar_Deva",  "min_Latn",  "mkd_Cyrl",  "plt_Latn",  "mlt_Latn",  "mni_Beng",  "khk_Cyrl",  "mos_Latn",  "mri_Latn",  "zsm_Latn",  "mya_Mymr",  "nld_Latn",  "nno_Latn",  "nob_Latn",  "npi_Deva",  "nso_Latn",  "nus_Latn",  "nya_Latn",  "oci_Latn",  "gaz_Latn",  "ory_Orya",  "pag_Latn",  "pan_Guru",  "pap_Latn",  "pol_Latn",  "por_Latn",  "prs_Arab",  "pbt_Arab",  "quy_Latn",  "ron_Latn",  "run_Latn",  "rus_Cyrl",  "sag_Latn",  "san_Deva",  "sat_Beng",  "scn_Latn",  "shn_Mymr",  "sin_Sinh",  "slk_Latn",  "slv_Latn",  "smo_Latn",  "sna_Latn",  "snd_Arab",  "som_Latn",  "sot_Latn",  "spa_Latn",  "als_Latn",  "srd_Latn",  "srp_Cyrl",  "ssw_Latn",  "sun_Latn",  "swe_Latn",  "swh_Latn",  "szl_Latn",  "tam_Taml",  "tat_Cyrl",  "tel_Telu",  "tgk_Cyrl",  "tgl_Latn",  "tha_Thai",  "tir_Ethi",  "taq_Latn",  "taq_Tfng",  "tpi_Latn",  "tsn_Latn",  "tso_Latn",  "tuk_Latn",  "tum_Latn",  "tur_Latn",  "twi_Latn",  "tzm_Tfng",  "uig_Arab",  "ukr_Cyrl",  "umb_Latn",  "urd_Arab",  "uzn_Latn",  "vec_Latn",  "vie_Latn",  "war_Latn",  "wol_Latn",  "xho_Latn",  "ydd_Hebr",  "yor_Latn",  "yue_Hant",  "zho_Hans",  "zho_Hant",  "zul_Latn" ]


pregunta = "what is Artificial intelligence?" #@param {type:"string"}

lenguaje_traducir = "spa_Latn" #@param ["ace_Arab",  "ace_Latn",  "acm_Arab",  "acq_Arab",  "aeb_Arab",  "afr_Latn",  "ajp_Arab",  "aka_Latn",  "amh_Ethi",  "apc_Arab",  "arb_Arab",  "ars_Arab",  "ary_Arab",  "arz_Arab",  "asm_Beng",  "ast_Latn",  "awa_Deva",  "ayr_Latn",  "azb_Arab",  "azj_Latn",  "bak_Cyrl",  "bam_Latn",  "ban_Latn",  "bel_Cyrl",  "bem_Latn",  "ben_Beng",  "bho_Deva",  "bjn_Arab",  "bjn_Latn",  "bod_Tibt",  "bos_Latn",  "bug_Latn",  "bul_Cyrl",  "cat_Latn",  "ceb_Latn",  "ces_Latn",  "cjk_Latn",  "ckb_Arab",  "crh_Latn",  "cym_Latn",  "dan_Latn",  "deu_Latn",  "dik_Latn",  "dyu_Latn",  "dzo_Tibt",  "ell_Grek",  "eng_Latn",  "epo_Latn",  "est_Latn",  "eus_Latn",  "ewe_Latn",  "fao_Latn",  "pes_Arab",  "fij_Latn",  "fin_Latn",  "fon_Latn",  "fra_Latn",  "fur_Latn",  "fuv_Latn",  "gla_Latn",  "gle_Latn",  "glg_Latn",  "grn_Latn",  "guj_Gujr",  "hat_Latn",  "hau_Latn",  "heb_Hebr",  "hin_Deva",  "hne_Deva",  "hrv_Latn",  "hun_Latn",  "hye_Armn",  "ibo_Latn",  "ilo_Latn",  "ind_Latn",  "isl_Latn",  "ita_Latn",  "jav_Latn",  "jpn_Jpan",  "kab_Latn",  "kac_Latn",  "kam_Latn",  "kan_Knda",  "kas_Arab",  "kas_Deva",  "kat_Geor",  "knc_Arab",  "knc_Latn",  "kaz_Cyrl",  "kbp_Latn",  "kea_Latn",  "khm_Khmr",  "kik_Latn",  "kin_Latn",  "kir_Cyrl",  "kmb_Latn",  "kon_Latn",  "kor_Hang",  "kmr_Latn",  "lao_Laoo",  "lvs_Latn",  "lij_Latn",  "lim_Latn",  "lin_Latn",  "lit_Latn",  "lmo_Latn",  "ltg_Latn",  "ltz_Latn",  "lua_Latn",  "lug_Latn",  "luo_Latn",  "lus_Latn",  "mag_Deva",  "mai_Deva",  "mal_Mlym",  "mar_Deva",  "min_Latn",  "mkd_Cyrl",  "plt_Latn",  "mlt_Latn",  "mni_Beng",  "khk_Cyrl",  "mos_Latn",  "mri_Latn",  "zsm_Latn",  "mya_Mymr",  "nld_Latn",  "nno_Latn",  "nob_Latn",  "npi_Deva",  "nso_Latn",  "nus_Latn",  "nya_Latn",  "oci_Latn",  "gaz_Latn",  "ory_Orya",  "pag_Latn",  "pan_Guru",  "pap_Latn",  "pol_Latn",  "por_Latn",  "prs_Arab",  "pbt_Arab",  "quy_Latn",  "ron_Latn",  "run_Latn",  "rus_Cyrl",  "sag_Latn",  "san_Deva",  "sat_Beng",  "scn_Latn",  "shn_Mymr",  "sin_Sinh",  "slk_Latn",  "slv_Latn",  "smo_Latn",  "sna_Latn",  "snd_Arab",  "som_Latn",  "sot_Latn",  "spa_Latn",  "als_Latn",  "srd_Latn",  "srp_Cyrl",  "ssw_Latn",  "sun_Latn",  "swe_Latn",  "swh_Latn",  "szl_Latn",  "tam_Taml",  "tat_Cyrl",  "tel_Telu",  "tgk_Cyrl",  "tgl_Latn",  "tha_Thai",  "tir_Ethi",  "taq_Latn",  "taq_Tfng",  "tpi_Latn",  "tsn_Latn",  "tso_Latn",  "tuk_Latn",  "tum_Latn",  "tur_Latn",  "twi_Latn",  "tzm_Tfng",  "uig_Arab",  "ukr_Cyrl",  "umb_Latn",  "urd_Arab",  "uzn_Latn",  "vec_Latn",  "vie_Latn",  "war_Latn",  "wol_Latn",  "xho_Latn",  "ydd_Hebr",  "yor_Latn",  "yue_Hant",  "zho_Hans",  "zho_Hant",  "zul_Latn" ]

print("\n")
print("> ", textwrap.fill(texto_completo, 100))
resTrad = translate(texto_completo, lenguaje_texto, lenguaje_traducir)
print("\t\t (", textwrap.fill(resTrad, 100), ")")

# resume
txt_resumido = generate_summary(texto_completo)
print("\t  RESUMEN: ", textwrap.fill(txt_resumido, 100))

# traduce resumen
resTrad = translate(txt_resumido, lenguaje_texto, lenguaje_traducir)
print("\t\t (", textwrap.fill(resTrad, 100), ")")

print("")
print("\t", pregunta, " (con texto completo)")

# procesa pregunta por modelo Q&A para texto completo
res = responder_pregunta(pregunta, texto_completo)
print("\t  -->", res)

# traduce
q_resTrad = translate(pregunta, lenguaje_texto, lenguaje_traducir)
r_resTrad = translate(res, lenguaje_texto, lenguaje_traducir)

print("\t\t\t(", q_resTrad + " --> " + r_resTrad, ")")

print("")
print("\t", pregunta, " (con resumen)")

# procesa pregunta por modelo Q&A para resumen
res = responder_pregunta(pregunta, txt_resumido)

print("\t  -->", res)

# traduce
r_resTrad = translate(res, lenguaje_texto, lenguaje_traducir)

print("\t\t (", q_resTrad + " --> " + r_resTrad, ")")





>  Artificial intelligence (AI) is the intelligence of machines or software, as opposed to the
intelligence of humans or other animals. It is a field of study in computer science that develops
and studies intelligent machines. Such machines may be called AIs. AI technology is widely used
throughout industry, government, and science. Some high-profile applications are: advanced web
search engines (e.g., Google Search), recommendation systems (used by YouTube, Amazon, and Netflix),
understanding human speech (such as Google Assistant, Siri, and Alexa), self-driving cars (e.g.,
Waymo), generative and creative tools (ChatGPT and AI art), and superhuman play and analysis in
strategy games (such as chess and Go). Alan Turing was the first person to conduct substantial
research in the field that he called Machine Intelligence. Artificial intelligence was founded as an
academic discipline in 1956. The field went through multiple cycles of optimism followed by
disappointment and loss of fundi