#### 1 tokenizer 

In [2]:
from transformers import BertTokenizer

# Initialize the tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Text example
text = "I love learning natural language processing"

# Tokenize the text
tokens = tokenizer.tokenize(text)
print(f"Original text: {text}")
print(f"Tokenized text: {tokens}")

# Convert tokens to IDs
input_ids = tokenizer.convert_tokens_to_ids(tokens)
print(f"Token IDs: {input_ids}")

Original text: I love learning natural language processing
Tokenized text: ['i', 'love', 'learning', 'natural', 'language', 'processing']
Token IDs: [1045, 2293, 4083, 3019, 2653, 6364]


#### 2. Sentiment 

In [29]:
import torch
from transformers import pipeline
model_name="distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Create a sentiment analysis pipeline
sentiment_analyzer = pipeline(
    "sentiment-analysis",
    model=model_name, tokenizer= tokenizer
)

# Test text
text = "I must to learn natural language processing"

# Get the sentiment
result = sentiment_analyzer(text)
print(f"Sentiment: {result[0]['label']}")
print(f"Confidence: {result[0]['score']:.4f}")

Device set to use cpu


Sentiment: POSITIVE
Confidence: 0.9361


#### 3. NER 

In [19]:
from transformers import AutoModelForTokenClassification, AutoTokenizer
from transformers import pipeline

# Cargar el modelo y el tokenizador
model_name = "dslim/bert-base-NER"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

# Crear el pipeline de NER
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)

# Texto de prueba
texto = "El doctor Yan LeCun trabaja en Facebook y vive en Francia."

# Obtener entidades
entidades = ner_pipeline(texto)

# Mostrar resultados
for entidad in entidades:
    print(f"{entidad['word']} - {entidad['entity']} - {entidad['score']:.2f}")


Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu


Yan - B-PER - 1.00
Le - I-PER - 1.00
##C - I-PER - 1.00
##un - I-PER - 0.84
Facebook - B-ORG - 0.90
Fr - B-LOC - 0.93
##an - I-LOC - 0.94
##cia - I-LOC - 0.92


In [26]:
from transformers import pipeline
# Initialize the NER pipeline
model_name="dbmdz/bert-large-cased-finetuned-conll03-english"
ner_pipeline = pipeline("ner", model=model_name, aggregation_strategy="simple")
# Text example
text = "Apple CEO tim Cook announced new iPhone models in California yesterday."
# Perform NER
entities = ner_pipeline(text)
# Print the results
for entity in entities:
    print(f"Entity: {entity['word']}")
    print(f"Type: {entity['entity_group']}")
    print(f"Confidence: {entity['score']:.4f}")
    print("-" * 30)

Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu


Entity: Apple
Type: ORG
Confidence: 0.9979
------------------------------
Entity: Tomás Arteaga
Type: PER
Confidence: 0.9986
------------------------------
Entity: iPhone
Type: MISC
Confidence: 0.9939
------------------------------
Entity: California
Type: LOC
Confidence: 0.9997
------------------------------


#### 4. Summarization


In [30]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

# Cargar el modelo y el tokenizador
modelo = "sshleifer/distilbart-cnn-12-6"
tokenizer = AutoTokenizer.from_pretrained(modelo)
model = AutoModelForSeq2SeqLM.from_pretrained(modelo)

# Crear el pipeline de resumen
summarization_pipeline = pipeline("summarization", model=model, tokenizer=tokenizer)

# Texto de ejemplo
texto = """The Jebel es Zubleh is a mountain fifty miles and more in length, and so narrow that its tracery on the map gives it a likeness to a caterpillar crawling from the south to the north. Standing on its red-and-white cliffs, and looking off under the path of the rising sun, one sees only the Desert of Arabia, where the east winds, so hateful to vinegrowers of Jericho, have kept their playgrounds since the beginning. Its feet are well covered by sands tossed from the Euphrates, there to lie, for the mountain is a wall to the pasture-lands of Moab and Ammon on the west—lands which else had been of the desert a part.
The Arab has impressed his language upon everything south and east of Judea, so, in his tongue, the old Jebel is the parent of numberless wadies which, intersecting the Roman road—now a dim suggestion of what once it was, a dusty path for Syrian pilgrims to and from Mecca—run their furrows, deepening as they go, to pass the torrents of the rainy season into the Jordan, or their last receptacle, the Dead Sea. Out of one of these wadies—or, more particularly, out of that one which rises at the extreme end of the Jebel, and, extending east of north, becomes at length the bed of the Jabbok River—a traveller passed, going to the table-lands of the desert. To this person the attention of the reader is first besought."""

# Generar resumen
resumen = summarization_pipeline(texto, max_length=50, min_length=20, do_sample=False)

# Mostrar el resumen
print(resumen[0]['summary_text'])


Device set to use cpu


 The Jebel is a mountain so narrow that its tracery on the map gives it a likeness to a caterpillar crawling from the south to the north . Its feet are well covered by sands tossed from the Euphrates, there to


In [32]:
resumen

[{'summary_text': ' The Jebel is a mountain so narrow that its tracery on the map gives it a likeness to a caterpillar crawling from the south to the north . Its feet are well covered by sands tossed from the Euphrates, there to'}]

In [49]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Cargar el modelo y el tokenizador
modelo = "t5-base"
tokenizer = AutoTokenizer.from_pretrained(modelo)
model = AutoModelForSeq2SeqLM.from_pretrained(modelo)

# Texto en inglés
texto_origen ="The artificial intelligence revolution is changing the world."
# Preprocesar el texto para traducción
entrada = f"translate English to French: {texto_origen}"
tokens = tokenizer.encode(entrada, return_tensors="pt")

# Generar traducción
output_tokens = model.generate(tokens, max_length=50)
traduccion = tokenizer.decode(output_tokens[0], skip_special_tokens=True)

# Mostrar la traducción
print(traduccion)


La révolution de l'intelligence artificielle change le monde.
