# Głowne zadania w NLP, a transformery
Teraz przyjrzymy się głównym zastosowaniom transformerów w NLP przy pomocy HuggingFace


In [None]:
!pip3 install transformers[sentencepiece]==4.18.0
!pip3 install datasets==1.15.1
!pip3 install huggingface_hub>=0.1.0,<1.0.0

In [None]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

## Wypełnienie maski


In [None]:
classifier = pipeline("fill-mask")
classifier("Warsaw is the <mask> of Poland.")

## Klasyfikacja tokenów
Wyróżnia się:
- Named Entity Recognition (NER)
-  Part of Speech (PoS)

In [None]:
pipe = pipeline("ner", 'Babelscape/wikineural-multilingual-ner')
example = "Narodowy Korpus Języka Polskiego jest wspólną inicjatywą Instytutu Podstaw Informatyki PAN (koordynator), Instytutu Języka Polskiego PAN, Wydawnictwa Naukowego PWN oraz Zakładu Językoznawstwa Komputerowego i Korpusowego Uniwersytetu Łódzkiego."

ner_results = pipe(example)
print(ner_results)

## Tłumaczenie
Oprócz tłumaczeń z jednego języka na inny, zaliczają tu się też takie rzeczy jak np. transfer stylu, generowanie kodu na podstawie promptu, itp.

In [None]:
tran = pipeline("translation", 'Helsinki-NLP/opus-mt-pl-en')
text = "Modele generatywne to szeroka klasa algorytmów nauczenia maszynowego, która zajmuje się modelowaniem rozkładu łącznego danych."

translation = tran(text)
print(translation)


## Podsumowanie

In [None]:
summ = pipeline('summarization', model='google/pegasus-xsum')
text = 'Natural language processing (NLP) is a subfield of linguistics, computer science, and artificial intelligence concerned with the interactions between computers and human language, in particular how to program computers to process and analyze large amounts of natural language data. The goal is a computer capable of "understanding" the contents of documents, including the contextual nuances of the language within them. The technology can then accurately extract information and insights contained in the documents as well as categorize and organize the documents themselves. Challenges in natural language processing frequently involve speech recognition, natural-language understanding, and natural-language generation.'

summarized = summ(text)
print(summarized)


## Generowanie tekstu
*Tutaj na google colab może wyskoczyć CUDA error bez wersji pro - za mało VRAMu by pomieścić model.*

In [None]:
generator = pipeline('text-generation', model='sberbank-ai/mGPT')
generator("Hello, I'm a language model,", max_length=30, num_return_sequences=1)

## Ekstrakcyjna odpowiedź na pytania 

In [None]:
from transformers import pipeline

qa_model = pipeline("question-answering")
question = "Who founded facebook?"
context = 'Facebook is an American online social media and social networking service owned by Meta Platforms. Founded in 2004 by Mark Zuckerberg with fellow Harvard College students and roommates Eduardo Saverin, Andrew McCollum, Dustin Moskovitz, and Chris Hughes, its name comes from the face book directories often given to American university students. Membership was initially limited to Harvard students, gradually expanding to other North American universities and, since 2006, anyone over 13 years old. As of 2020, Facebook claimed 2.8 billion monthly active users,[2] and ranked fourth in global internet usage.[6] It was the most downloaded mobile app of the 2010s.'
qa_model(question = question, context = context)

## Podobieństwo zdań

In [None]:
!pip install -U sentence-transformers

In [None]:
from sentence_transformers import SentenceTransformer, util
sentences = ["I'm happy", "I'm full of happiness"]

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

embedding_1= model.encode(sentences[0], convert_to_tensor=True)
embedding_2 = model.encode(sentences[1], convert_to_tensor=True)

util.pytorch_cos_sim(embedding_1, embedding_2)