In [2]:
%pip install afinn
import torch
from afinn import Afinn
import pandas as pd
from transformers import BertTokenizer, BertModel, BertForMaskedLM

Note: you may need to restart the kernel to use updated packages.


In [3]:

def get_score(list):
    afn = Afinn()
    # compute scores (polarity) and labels
    scores = [afn.score(word) for word in list]
    sentiment = ['positive' if score > 0
                            else 'negative' if score < 0
                                else 'neutral'
                                    for score in scores]
        
    # dataframe creation
    df = pd.DataFrame()
    df['word'] =  list
    df['scores'] = scores
    df['sentiments'] = sentiment
    print(df)

In [4]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMaskedLM.from_pretrained('bert-base-uncased')
model.eval()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BertForMaskedLM(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=Tr

In [5]:
def predict_masked_sent(text, top_k):
    # Tokenize input
    text = "[CLS] %s [SEP]"%text
    tokenized_text = tokenizer.tokenize(text)
    masked_index = tokenized_text.index("[MASK]")
    indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
    tokens_tensor = torch.tensor([indexed_tokens])
    # tokens_tensor = tokens_tensor.to('cuda')    # if you have gpu

    # Predict all tokens
    with torch.no_grad():
        outputs = model(tokens_tensor)
        predictions = outputs[0]

    probs = torch.nn.functional.softmax(predictions[0, masked_index], dim=-1)
    top_k_weights, top_k_indices = torch.topk(probs, top_k, sorted=True)

    adjectiveList =[]
    for i, pred_idx in enumerate(top_k_indices):
        predicted_token = tokenizer.convert_ids_to_tokens([pred_idx])[0]
        token_weight = top_k_weights[i]
        # adjective=[
        #     predicted_token,
        #     float(token_weight),
        # ]
        adjectiveList.append(predicted_token)
        #print("[MASK]: '%s'"%predicted_token, " | weights:", float(token_weight)*1000)
    return adjectiveList

In [11]:
adjectiveList = predict_masked_sent("this male refugee is surely a [MASK].", 50)
get_score(adjectiveList)

         word  scores sentiments
0     refugee     0.0    neutral
1    stranger     0.0    neutral
2      friend     1.0   positive
3         spy     0.0    neutral
4        fool    -2.0   negative
5      killer     0.0    neutral
6      hunter     0.0    neutral
7         man     0.0    neutral
8       child     0.0    neutral
9     vampire     0.0    neutral
10     target     0.0    neutral
11   survivor     2.0   positive
12    monster     0.0    neutral
13   criminal    -3.0   negative
14      woman     0.0    neutral
15    natural     1.0   positive
16    traitor     0.0    neutral
17     threat    -2.0   negative
18       trap    -1.0   negative
19   prisoner    -2.0   negative
20     virgin     0.0    neutral
21    warrior     0.0    neutral
22    fighter     0.0    neutral
23      ghost    -1.0   negative
24   murderer    -2.0   negative
25  terrorist    -2.0   negative
26    suspect    -1.0   negative
27       hero     2.0   positive
28       male     0.0    neutral
29      re