# Transformers library use case types
Common examples of using transformers library with models from huggingface.co.

In [1]:
!pip install transformers
!pip install tensorflow
!pip install tf-keras
!pip install sacremoses

import warnings
warnings.filterwarnings('ignore')

Collecting transformers
  Using cached transformers-4.46.2-py3-none-any.whl.metadata (44 kB)
Collecting huggingface-hub<1.0,>=0.23.2 (from transformers)
  Using cached huggingface_hub-0.26.2-py3-none-any.whl.metadata (13 kB)
Collecting safetensors>=0.4.1 (from transformers)
  Using cached safetensors-0.4.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Collecting tokenizers<0.21,>=0.20 (from transformers)
  Using cached tokenizers-0.20.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading transformers-4.46.2-py3-none-any.whl (10.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25hUsing cached huggingface_hub-0.26.2-py3-none-any.whl (447 kB)
Downloading safetensors-0.4.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (434 kB)
Downloading tokenizers-0.20.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x

## Text Generation

In [2]:
from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline

def download_model(model_name: str, pipe_type='text-generation'):
   tokenizer = AutoTokenizer.from_pretrained(model_name)
   model = AutoModelWithLMHead.from_pretrained(model_name)
   pipe = pipeline(pipe_type, model=model, tokenizer=tokenizer)
   return pipe


text = "Provide recipe with list of steps to cook cheesecake."
pipe = download_model("gpt2")
text_generated = pipe(text, max_new_tokens=200,temperature=0.01)

print(text_generated[0]['generated_text'])

2024-11-10 07:04:56.281724: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-10 07:04:56.289057: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-10 07:04:56.310373: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1731218696.345070   14270 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1731218696.355540   14270 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-10 07:04:56.414004: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

ImportError: 
AutoModelWithLMHead requires the PyTorch library but it was not found in your environment.
However, we were able to find a TensorFlow installation. TensorFlow classes begin
with "TF", but are otherwise identically named to our PyTorch classes. This
means that the TF equivalent of the class you tried to import would be "TFAutoModelWithLMHead".
If you want to use TensorFlow, please use TF classes instead!

If you really do want to use PyTorch please go to
https://pytorch.org/get-started/locally/ and follow the instructions that
match your environment.


## Text Classification

In [29]:
from transformers import pipeline

# Załaduj model klasyfikacji tekstów
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Przykładowe artykuły
articles = [
    "The government announced new tax reforms today.",
    "The local team won the championship in a thrilling match.",
    "New advancements in AI are reshaping the tech industry.",
    "The art exhibit showcased contemporary works by emerging artists.",
    "New guidelines for a healthy diet were published by the health department."
]

# Definiowanie możliwych kategorii
candidate_labels = ["Polityka", "Sport", "Technologia", "Kultura", "Zdrowie"]

# Klasyfikacja artykułów
results = classifier(articles, candidate_labels=candidate_labels)

# Wyświetlenie wyników
for article, result in zip(articles, results):
    print(f"Article: {article}")
    print(f"Predicted Category: {result['labels'][0]}, Confidence: {result['scores'][0]}\n")


Article: The government announced new tax reforms today.
Predicted Category: Polityka, Confidence: 0.31741610169410706

Article: The local team won the championship in a thrilling match.
Predicted Category: Sport, Confidence: 0.6169815063476562

Article: New advancements in AI are reshaping the tech industry.
Predicted Category: Technologia, Confidence: 0.6335176825523376

Article: The art exhibit showcased contemporary works by emerging artists.
Predicted Category: Kultura, Confidence: 0.2816486954689026

Article: New guidelines for a healthy diet were published by the health department.
Predicted Category: Polityka, Confidence: 0.2837600111961365



## Sentiment analysis

In [30]:
from transformers import pipeline

# Załaduj model klasyfikacji sentymentu (może to być np. model GPT lub inny dostępny w ramach transformers)
classifier = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")

# Przykładowe recenzje
reviews = [
    "This product is amazing! I loved it.",
    "I am very disappointed. The product broke after one use.",
    "It's okay, does the job but nothing special."
]

# Klasyfikacja recenzji
results = classifier(reviews)

# Wyświetlenie wyników
for review, result in zip(reviews, results):
    print(f"Review: {review}")
    print(f"Sentiment: {result['label']}, Confidence: {result['score']}\n")



config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Review: This product is amazing! I loved it.
Sentiment: positive, Confidence: 0.9866390228271484

Review: I am very disappointed. The product broke after one use.
Sentiment: negative, Confidence: 0.930964469909668

Review: It's okay, does the job but nothing special.
Sentiment: neutral, Confidence: 0.6142873764038086



## Document analysis

In [37]:
from transformers import pipeline

pipe = pipeline("question-answering", model="google/flan-t5-base")

file = open("data/annual_report.html", "r")
document = file.read()
result = pipe(question="What is annual revenue of the company based on attached annual report?", context=document )
print(result)

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

Some weights of T5ForQuestionAnswering were not initialized from the model checkpoint at google/flan-t5-base and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'score': 0.0001070289799827151, 'start': 10720, 'end': 10760, 'answer': ' <td style="text-align:right;">45,3</td>'}


## Machine translation

In [54]:
from transformers import pipeline

pipe = pipeline("translation", model="facebook/wmt19-en-de")

result = pipe("I'm foreigner and I don't speak german fluently.")
print(result)

Some weights of FSMTForConditionalGeneration were not initialized from the model checkpoint at facebook/wmt19-en-de and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[{'translation_text': 'Ich bin Ausländer und spreche kein fließend Deutsch.'}]


## Question answering

In [51]:
from transformers import pipeline

pipe = pipeline("question-answering", model="distilbert/distilbert-base-cased-distilled-squad")

result = pipe(context="""Collegium Da Vinci z siedzibą w Poznaniu – polska uczelnia niepubliczna w Poznaniu. Collegium Da Vinci to praktyczna uczelnia biznesowa kształcąca specjalistów w obszarze kreatywnego sektora biznesu. W ofercie posiada studia dyplomowe (I i II stopnia) i podyplomowe (w tym EMBA) w 4 głównych obszarach: Media kreatywne i sztuka, IT i analiza danych, Marketing i technologie, Zarządzanie i HR.
Uczelnia realizuje model nauczania oparty na diagnozie indywidualnych predyspozycji studentów (test Gallupa, Insightful Profiler™), indywidualizacji ścieżek kształcenia (moduły dodatkowe) oraz wsparciu tutorów w wyznaczaniu i osiąganiu edukacyjnych celów.
Oferowane przez CDV kierunki są mocno osadzone w realiach rynkowych, są tworzone wspólnie z biznesem oraz zorientowane na naukę praktycznych umiejętności zawodowych, aby pomagać studentom i słuchaczom w zaprojektowaniu lub rozwoju ich kariery zawodowej. Collegium Da Vinci kształci w podejściu interdyscyplinarnym, łącząc biznes z kreatywnością i humanistycznym podejściem do technologii.
Historia
10 czerwca 1996 Minister Edukacji Narodowej udzielił zezwolenia na utworzenie Wyższej Szkoły Nauk Humanistycznych i Dziennikarstwa w Poznaniu, której pomysłodawcą oraz założycielem jest Piotr Voelkel. WSNHiD zostało wpisane do wykazu uczelni niepublicznych pod numerem 90. W tym samym roku pierwsi studenci rozpoczęli kształcenie na kierunku politologia i nauki społeczne. Dwa lata później na WSNHiD powstaje nowy kierunek – stosunki międzynarodowe. W 2000 uruchomione zostają kolejne kierunki – socjologia i kulturoznawstwo, a w roku następnym – informatyka. W 2006 uczelnia uzyskuje zgodę na kształcenie na kierunku pedagogika. """,
      question=['What is Collegium da Vinci?',
'What are services of Collegium da Vinci?',
'When Collegium Da Vinci was founded?',
'Where are headquarters of Collegium da Vinci?'])
print(result)

[{'score': 0.2113601565361023, 'start': 19, 'end': 40, 'answer': 'z siedzibą w Poznaniu'}, {'score': 0.09133698046207428, 'start': 350, 'end': 373, 'answer': 'Marketing i technologie'}, {'score': 0.00805636402219534, 'start': 1058, 'end': 1062, 'answer': '1996'}, {'score': 0.0014388621784746647, 'start': 50, 'end': 82, 'answer': 'uczelnia niepubliczna w Poznaniu'}]


## Summarization

In [5]:
from transformers import pipeline

pipe = pipeline("summarization", model="facebook/bart-large-cnn")
file = open("data/nad_niemnem.txt", "r")
document = file.read()
result = pipe(document[:800], max_length=100, min_length=20, do_sample=False)
print(result)

[{'summary_text': 'Dzień był letni i świąteczny. Ciepło i radość lały się z błękitnego nieba i złotego słońca.'}]
