In [3]:
# Uncomment and run this cell if you're on Colab or Kaggle
!git clone https://github.com/nlp-with-transformers/notebooks.git
%cd notebooks
from install import *
install_requirements()

Cloning into 'notebooks'...
remote: Enumerating objects: 530, done.[K
remote: Counting objects: 100% (209/209), done.[K
remote: Compressing objects: 100% (47/47), done.[K
remote: Total 530 (delta 184), reused 162 (delta 162), pack-reused 321 (from 2)[K
Receiving objects: 100% (530/530), 28.52 MiB | 28.72 MiB/s, done.
Resolving deltas: 100% (253/253), done.
/content/notebooks/notebooks
⏳ Installing base requirements ...
✅ Base requirements installed!
⏳ Installing Git LFS ...
✅ Git LFS installed!


In [4]:
from utils import *
setup_chapter()

Using transformers v4.16.2
Using datasets v1.16.1


## Hugging Face Transformers: Bridging the Gap

## A Tour of Transformer Applications

In [5]:
synthetic_review = """Dear Amazon, I recently purchased a set of wireless earbuds \
from your online store in the United States. However, upon receiving the package, \
I was disappointed to find that the earbuds were defective and would not charge. \
As someone who relies on quality audio equipment for work and leisure, this has \
been quite inconvenient. I kindly request a replacement for the defective product \
or a full refund. Attached are the necessary documents and proof of purchase. \
Looking forward to your prompt response. Sincerely, Alex."""

### Text Classification

In [6]:
from transformers import pipeline

classifier = pipeline(
    "text-classification",
)

Downloading:   0%|          | 0.00/629 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/255M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

In [7]:
import pandas as pd
pd.set_option("display.max_colwidth", None) # Display full text

outputs = classifier(synthetic_review)
pd.DataFrame(outputs)

Unnamed: 0,label,score
0,NEGATIVE,0.987568


### Named Entity Recognition

In [8]:
ner_tagger = pipeline("ner", aggregation_strategy="simple")
ner_outputs = ner_tagger(synthetic_review)
pd.DataFrame(ner_outputs)

Downloading:   0%|          | 0.00/998 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.24G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/60.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/208k [00:00<?, ?B/s]

Unnamed: 0,entity_group,score,word,start,end
0,ORG,0.948043,Amazon,5,11
1,LOC,0.999619,United States,90,103
2,PER,0.935605,Alex,515,519


### Question Answering

In [9]:
reader = pipeline("question-answering")
question = "What product was purchased?"
answer = reader(
    question=question,
    context=synthetic_review,
)
pd.DataFrame([answer])

Downloading:   0%|          | 0.00/473 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/249M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/208k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/426k [00:00<?, ?B/s]

Unnamed: 0,score,start,end,answer
0,0.53788,43,59,wireless earbuds


In [10]:
question = "Who wrote the review?"
answer = reader(
    question=question,
    context=synthetic_review,
)
pd.DataFrame([answer])

Unnamed: 0,score,start,end,answer
0,0.217927,515,519,Alex


In [11]:
question = "What does the customer want?"
answer = reader(
    question=question,
    context=synthetic_review,
)
pd.DataFrame([answer])

Unnamed: 0,score,start,end,answer
0,0.229822,345,401,a replacement for the defective product or a full refund


### Summarization

In [12]:
summarizer = pipeline("summarization")
summary = summarizer(
    synthetic_review,
    max_length=50,
    min_length=25,
    clean_up_tokenization_spaces=True,
)
pd.DataFrame(summary)

Downloading:   0%|          | 0.00/1.76k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.14G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Unnamed: 0,summary_text
0,Amazon's wireless earbuds were defective and would not charge. Alex asks for a replacement for the product or a full refund.


### Translation

In [13]:
translator = pipeline(
    "translation_en_to_fr",
    model="Helsinki-NLP/opus-mt-en-fr",
)
outputs = translator(
    synthetic_review,
    clean_up_tokenization_spaces=True,
    min_length=50,
)
pd.DataFrame(outputs)

Downloading:   0%|          | 0.00/1.38k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/287M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/760k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/784k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.28M [00:00<?, ?B/s]

Unnamed: 0,translation_text
0,"Cher Amazon, j'ai récemment acheté un ensemble d'oreilles sans fil de votre boutique en ligne aux États-Unis. Cependant, après avoir reçu le paquet, j'ai été déçu de constater que les oreillettes étaient défectueuses et ne seraient pas facturées. Comme quelqu'un qui compte sur un équipement audio de qualité pour le travail et les loisirs, cela a été tout à fait gênant. Je demande un remplacement pour le produit défectueux ou un remboursement complet."


### Text Generation

In [14]:
from transformers import set_seed
set_seed(7)

In [15]:
generator = pipeline(
    "text-generation",
    model="gpt2"
)
prompt = str(synthetic_review) + "\n\n Customer service response: \n Dear Alex, I am sorry to hear about your "
outputs = generator(
    prompt,
    max_length=200,
    truncation=True,
    num_return_sequences=1,
)
print(outputs[0]['generated_text'])

Downloading:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/523M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/0.99M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

Dear Amazon, I recently purchased a set of wireless earbuds from your online
store in the United States. However, upon receiving the package, I was
disappointed to find that the earbuds were defective and would not charge. As
someone who relies on quality audio equipment for work and leisure, this has
been quite inconvenient. I kindly request a replacement for the defective
product or a full refund. Attached are the necessary documents and proof of
purchase. Looking forward to your prompt response. Sincerely, Alex.

 Customer service response:
 Dear Alex, I am sorry to hear about your ___________ problems. I have recently
been contacted by your online store who have an order for the earbuds (as well
as an Air Conditioner for the earbuds). This was extremely confusing and hard to
explain to them. They contacted you through their email. I am not sure where you
heard from them other than in your email messages and in the following text. I
have ordered from you


### The Hugging Face Hub

<img alt="hub-overview" width="1000" caption="The models page of the Hugging Face Hub, showing filters on the left and a list of models on the right." src="https://github.com/nlp-with-transformers/notebooks/blob/main/images/chapter01_hub-overview.png?raw=1" id="hub-overview"/>

<img alt="hub-model-card" width="1000" caption="A example model card from the Hugging Face Hub. The inference widget is shown on the right, where you can interact with the model." src="https://github.com/nlp-with-transformers/notebooks/blob/main/images/chapter01_hub-model-card.png?raw=1" id="hub-model-card"/>