In [None]:
# Transformer experiments
# Inspiration from Huggingface
# February 2024. Sila

In [None]:
from transformers import pipeline, set_seed

classifier = pipeline("sentiment-analysis", "distilbert-base-uncased-finetuned-sst-2-english")
classifier("Aarhus is an awesome city, with a vibrant cultural scene.")

[{'label': 'POSITIVE', 'score': 0.999884843826294}]

In [None]:
classifier("AGF has seen better days")

[{'label': 'NEGATIVE', 'score': 0.9947360157966614}]

In [None]:
from transformers import pipeline
classifier = pipeline("zero-shot-classification",
                      model="facebook/bart-large-mnli")

classifier(
    "This is a course about C++ programming",
    candidate_labels=["Cooking", "IT", "politics", "business"],
)

{'sequence': 'This is a course about C++ programming',
 'labels': ['IT', 'business', 'Cooking', 'politics'],
 'scores': [0.4377013146877289,
  0.2929771840572357,
  0.1648344099521637,
  0.1044871136546135]}

In [None]:
classifier(
    "The recipe for marshmellows was great",
    candidate_labels=["Cooking", "IT", "politics", "business"])

{'sequence': 'The recipe for marshmellows was great',
 'labels': ['Cooking', 'IT', 'business', 'politics'],
 'scores': [0.9134815335273743,
  0.04793522506952286,
  0.030422000214457512,
  0.008161296136677265]}

In [None]:
generator = pipeline("text-generation", model='gpt2')
generator("In this course, we will teach you how to")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': "In this course, we will teach you how to read and talk with the world's finest poets, poets, performers, filmmakers, directors, directors of music, and in a small number of unique ways. We will:\n\nExplain common misunderstand"}]

In [None]:
set_seed(42)

generator("In this course, we will teach you how to", max_length=30, num_return_sequences=5)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'In this course, we will teach you how to manage your own domain.\n\nDependencies\n\nIf you are new to WordPress, you'},
 {'generated_text': 'In this course, we will teach you how to create a fully open-source, interactive, real time blockchain application to enable you to understand how data'},
 {'generated_text': 'In this course, we will teach you how to set up your own custom Android Wear phone using the Moto 360.\n\nOur next course will be'},
 {'generated_text': 'In this course, we will teach you how to write and do a simple program of some basic mathematics, a simple proof of theorem proving, and then'},
 {'generated_text': 'In this course, we will teach you how to build a smart device and how to use it. We will use a Google Assistant to help you connect'}]

In [None]:
generator("Donald Trump is", max_length=30, num_return_sequences=10)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'Donald Trump is playing both sides of the aisle this time around – a guy who has repeatedly declared both sides to be enemies and enemies of the American people'},
 {'generated_text': "Donald Trump is very un-American.\n\nAnd he was absolutely right. He can't have people of both parties, with their views and their"},
 {'generated_text': "Donald Trump is not your enemy (we have zero enemies) and he's not like Putin. We also never say the only superpower is America. We"},
 {'generated_text': 'Donald Trump is now under investigation, and if he was, he would be prosecuted to the full extent of the law," the letter said.\n\n'},
 {'generated_text': 'Donald Trump is getting some of this money without his signature and without doing anything that\'s going to change things," Romney campaign spokeswoman Jennifer Palmieri said on'},
 {'generated_text': "Donald Trump is a white supremacist.\n\nBut this isn't as black as you might believe. We're only one-quarter black.\n\n"},
 

In [None]:
generator("Freudian theories are", max_length=30, num_return_sequences=10)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'Freudian theories are the product of a large-scale historical evolution of the nature of reality.\n\nBut that makes the question of whether people'},
 {'generated_text': "Freudian theories are false. This is why they're so popular, because they are the ones who deny the existence of the Trinity. And as"},
 {'generated_text': 'Freudian theories are not the same thing as empirical theory, which is the subject we want to examine in detail, as much as it is necessary'},
 {'generated_text': 'Freudian theories are grounded in fact. When Hitler went into power, one of the main arguments being that the Jews were inferior to the Germans -'},
 {'generated_text': "Freudian theories are based on the axioms that show that you may have different desires or the opposite of which you're just an ideal."},
 {'generated_text': "Freudian theories are a good deal more plausible, but how do they relate to that of quantum physics? Let's assume that I have an account"},
 {'generated_text'

In [None]:
question_answerer = pipeline("question-answering", model='distilbert-base-cased-distilled-squad')
question_answerer(
    question="Where do I work?",
    context="My name is Sylvain and I work at Hugging Face in Brooklyn",
)

{'score': 0.6949767470359802, 'start': 33, 'end': 45, 'answer': 'Hugging Face'}

In [None]:
context = r"""

Extractive Question Answering is the task of extracting an answer from a text given a question. An example of a
question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would like to fine-tune
a model on a SQuAD task, you may leverage the examples/pytorch/question-answering/run_squad.py script.

"""

In [None]:
result = question_answerer(question="What is a good example of a question answering dataset?",     context=context)

print(f"Answer: '{result['answer']}', score: {round(result['score'], 4)}, start: {result['start']}, end: {result['end']}")

Answer: 'SQuAD dataset', score: 0.5152, start: 148, end: 161


In [None]:
context = r"""
Burt Freeman Bacharach, May 12, 1928 – February 8, 2023, was an American composer, songwriter, record producer, and pianist
who is widely regarded as one of the most important and influential figures of 20th-century popular music.
Starting in the 1950s, he composed hundreds of pop songs, many in collaboration with lyricist Hal David.
Bacharachs music is characterized by unusual chord progressions and time signature changes,
influenced by his background in jazz, and uncommon selections of instruments for small orchestras.
He arranged, conducted, and produced much of his recorded output.
"""

In [None]:
result = question_answerer(question="Who did Bacharac work with?",     context=context)

print(f"Answer: '{result['answer']}', score: {round(result['score'], 4)}, start: {result['start']}, end: {result['end']}")

Answer: 'Hal David', score: 0.5882, start: 327, end: 336
