# Transformers

In [1]:
import spacy
from spacy import displacy

nlp_en_lg = spacy.load("en_core_web_lg")

text_sample = """As regulators, official bodies, and general users come to depend on AI-based dynamic systems, clearer accountability will be required for automated decision-making processes to ensure trust and transparency. Evidence of this requirement gaining more momentum can be seen with the launch of the first global conference exclusively dedicated to this emerging discipline, the International Joint Conference on Artificial Intelligence: Workshop on Explainable Artificial Intelligence (XAI).[63]

The European Union introduced a right to explanation in the General Data Protection Right (GDPR) as an attempt to deal with the potential problems stemming from the rising importance of algorithms. The implementation of the regulation began in 2018. However, the right to explanation in GDPR covers only the local aspect of interpretability. In the United States, insurance companies are required to be able to explain their rate and coverage decisions.[64]
"""

doc = nlp_en_lg(text_sample)
print(type(doc))

displacy.render(doc, style = "ent")


  from .autonotebook import tqdm as notebook_tqdm


<class 'spacy.tokens.doc.Doc'>


In [2]:
# Named Entity Recognition (NER) with BERT

nlp_en_trf = spacy.load("en_core_web_trf")
doc = nlp_en_trf(text_sample)
displacy.render(doc, style = "ent") 



In [4]:
# NER swedish

nlp_swe = spacy.load("sv_core_news_sm")

text_sample_swe = """
Grannlandet Norge har kommit långt med att elektrifiera sin bilflotta. Om ett år kommer nybilsförsäljningen i Norge vara uppe i 100 procent bilar med sladd. Min kollega , techkorrespondenten Alexander Norén berättar att det som förbluffade honom när han åkte till Norge för att få förklaringen till elbilsboomen där var hur starka de ekonomiska incitamenten är, att det för många är en plånboksfråga att dumpa fossilbilen. 
"""

doc = nlp_swe(text_sample_swe)
displacy.render(doc, "ent")

In [5]:
entities = {f"{entity}": entity.label_ for entity in doc.ents}
entities

{'Norge': 'LOC', 'Alexander Norén': 'PRS'}

## Hugging face

In [6]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("marma/bert-base-swedish-cased-sentiment")

model = AutoModelForSequenceClassification.from_pretrained("marma/bert-base-swedish-cased-sentiment")
model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(50325, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [7]:
from transformers import pipeline

sentiment = pipeline('sentiment-analysis', model='marma/bert-base-swedish-cased-sentiment')
sentiment("bättre än kattskit")

[{'label': 'POSITIVE', 'score': 0.9939852356910706}]

In [8]:
sentences = ["Jag älskar dig sådär mycket", "Du är helt okej",
             "Matematik", "Statistik",
             "Glaset är halvfullt",
             "Glaset är halvtomt", "Jag har ätit pannkaka",
             "När du tar av dig skorna blir allt skönt", 
             "Gillar du pannkaka?"]

for sentence in sentences:
    label, score = sentiment(sentence)[0]["label"], sentiment(sentence)[0]["score"] 
    print(f"{sentence}: {label}, {score:.3f} ")

Jag älskar dig sådär mycket: POSITIVE, 0.999 
Du är helt okej: POSITIVE, 0.999 
Matematik: POSITIVE, 0.987 
Statistik: POSITIVE, 0.984 
Glaset är halvfullt: NEGATIVE, 0.997 
Glaset är halvtomt: NEGATIVE, 0.998 
Jag har ätit pannkaka: NEGATIVE, 0.998 
När du tar av dig skorna blir allt skönt: POSITIVE, 0.998 
Gillar du pannkaka?: NEGATIVE, 0.997 


## GPT-2

In [9]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("gpt2")

model = AutoModelForCausalLM.from_pretrained("gpt2")

In [10]:
model

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
      (1): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dro

In [11]:
from transformers import pipeline, set_seed
gpt2 = pipeline('text-generation', model='gpt2')
set_seed(42)
gpt2("Hello, I'm a language model,", max_length=30, num_return_sequences=5)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': "Hello, I'm a language model, I'm writing a new language for you. But first, I'd like to tell you about the language itself"},
 {'generated_text': "Hello, I'm a language model, and I'm trying to be as expressive as possible. In order to be expressive, it is necessary to know"},
 {'generated_text': "Hello, I'm a language model, so I don't get much of a license anymore, but I'm probably more familiar with other languages on that"},
 {'generated_text': "Hello, I'm a language model, a functional model... It's not me, it's me!\n\nI won't bore you with how"},
 {'generated_text': "Hello, I'm a language model, not an object model.\n\nIn a nutshell, I need to give language model a set of properties that"}]

In [13]:
print(gpt2("Welcome to IT-högskolan", max_length = 100)[0]["generated_text"])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Welcome to IT-högskolanz.

If you were able to take part in the event in 2016, please join us. Feel free to check the registration page for updates.

You will need a Windows 8 compatible computer

Software requirements

I am sorry for your delay. I have been unable to contact You already!

After receiving your registration request form, I will send you an email of the information you will need within 30 days of your arrival.


In [14]:
print(gpt2("Welcome to IT-högskolan, we are a school specialised in IT. Our school has around 500 students. We are in Göteborg and Stockholm.", max_length = 150)[0]["generated_text"])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Welcome to IT-högskolan, we are a school specialised in IT. Our school has around 500 students. We are in Göteborg and Stockholm. All of our students are engineers, science teachers and so on.

We have three different areas: business and technology.

Industrialisation

Technological development

As we move towards building technologies in a more integrated way, it will be important to be able to develop new technologies in collaboration with other members of IT community from the local community. Therefore we need to develop some sort of technical communication. This will allow the local community to develop new ideas and techniques that can be combined with our development efforts.

Communications also is another important part of


In [15]:
print(gpt2("Frontend :( Backend :( Weekend :)", max_length = 100)[0]["generated_text"])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Frontend :( Backend :( Weekend :) Zooloretto Zoombinis Zotrix Zpeciation: Tough Days (TD) ZRoll Zula Zula Europe Zumbi Blocks Zup! Zup! 2 Zup! 3 Zup! 4 Zup! 5 Zup! 6 Zup! 7 Zup! 8 Zup! X Zup! Zero Zwei: The Arges Adventure Zwei: The Ilvard Insurrection Zzzz-Zzzz-Zzzz [the Sequence] 丛


In [16]:
print(gpt2("Bella is a cute small rabbit that I love.", max_length = 100)[0]["generated_text"])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Bella is a cute small rabbit that I love. I knew they would be doing a show with her next, but I never thought she would be on Cartoon Network… but she did. The fact that she has so many adorable little rabbits on so many shows makes it really hard to believe. So, I thought of writing a short "Bella-inspired " book about Bella's life after she ran into Mattel. I loved it so much!

P.S. It's not


### "Swedish GPT"

In [17]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("birgermoell/swedish-gpt")

model = AutoModelForCausalLM.from_pretrained("birgermoell/swedish-gpt")

Downloading: 100%|██████████| 207/207 [00:00<00:00, 208kB/s]
Downloading: 100%|██████████| 835k/835k [00:00<00:00, 1.17MB/s]
Downloading: 100%|██████████| 501k/501k [00:00<00:00, 798kB/s] 
Downloading: 100%|██████████| 1.40M/1.40M [00:00<00:00, 1.62MB/s]
Downloading: 100%|██████████| 24.0/24.0 [00:00<00:00, 8.02kB/s]
Downloading: 100%|██████████| 90.0/90.0 [00:00<00:00, 44.8kB/s]
Downloading: 100%|██████████| 863/863 [00:00<00:00, 865kB/s]
Downloading: 100%|██████████| 487M/487M [01:41<00:00, 5.02MB/s] 


In [18]:
gpt_swe = pipeline("text-generation", model = "birgermoell/swedish-gpt")

gpt_swe("Grattis på födelsedagen")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'Grattis på födelsedagen pappa, du har fått en riktigt fin födelsedagspresent 😍 Den var verkligen värd varendaste öre 😀 Ja, det är det väl. Då var iallafall inget vidare'}]

In [21]:
print(gpt_swe("Grattis på födelsedagen", max_length = 100)[0]["generated_text"])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Grattis på födelsedagen! Väcker du din stora sovandes son? Jag var själv inte på plats hela dagen igår men med vänner och familj på bubbel, bubbel och bubbeltejt var jag hemma ändå. I alla fall när det gäller familjen! Jag är verkligen väldigt stolt över mig själv att jag orkade släpa på mig några vänner som jag alltid hade hemma och som alltid skryter om det och som verkligen bryr sig. Att jag orkade
