# Results:
| model_name        | acc (no info)    | f1 (no info)    | acc (definition) | f1 (definition) | hf_link                                  |
|-------------------|--------|-------|-|-|------------------------------------------|
| bert-base-uncased | 0.5282 | 0.5290 | 0.5188 | 0.5189 | https://huggingface.co/bert-base-uncased |
| bert-base-cased   | 0.5293 | 0.5301 | 0.5366 | 0.5374 | https://huggingface.co/bert-base-cased |
| **roberta-base** | 0.5722 | 0.5717 | 0.5649 | 0.5669 | https://huggingface.co/roberta-base |
| distilbert-base-uncased | 0.5157 | 0.5153 | 0.5157 | 0.5193 | https://huggingface.co/distilbert-base-uncased |
| distilbert-base-cased | 0.5010 | 0.5040 | 0.5146 | 0.5164 | https://huggingface.co/distilbert-base-cased |
| albert-base-v2 | 0.4969 | 0.5020 | 0.4958 | 0.4995 | https://huggingface.co/albert-base-v2 |
| deberta-base | 0.5690 | 0.5693 | 0.5743 | 0.5742 | https://huggingface.co/microsoft/deberta-base |
| xlnet-base-cased | 0.5596 | 0.5598 | 0.5502 | 0.5500 | https://huggingface.co/xlnet-base-cased |

In [None]:
import pandas as pd
from simpletransformers.classification import ClassificationModel, ClassificationArgs
import torch

In [2]:
id2emotion = {
    0: 'Anger',
    1: 'Resentment',
    2: 'Frustration',
    3: 'Hate',
    4: 'Disgust',
    5: 'Boredom',
    6: 'Reluctance',
    7: 'Sadness',
    8: 'Pity',
    9: 'Loneliness',
    10: 'Humiliation',
    11: 'Longing',
    12: 'Envy',
    13: 'Guilt',
    14: 'Regret',
    15: 'Shame',
    16: 'Fear',
    17: 'Anxiety',
    18: 'Doubt',
    19: 'Desperation',
    20: 'Confusion',
    21: 'Shock',
    22: 'Pleasure',
    23: 'Serenity',
    24: 'Relief',
    25: 'Happiness',
    26: 'Lust',
    27: 'Affection',
    28: 'Gratitude',
    29: 'Admiration',
    30: 'Pride',
    31: 'Determination',
    32: 'Fascination',
    33: 'Surprise',
    34: 'Excitement',
    35: 'Hope'
}
emotion2id = {val: key for key, val in id2emotion.items()}

In [3]:
idiom_lexicon = pd.read_csv('../../../dataset/idiom_lexicon.csv')
idiom_lexicon.head()

Unnamed: 0,Idiom,WiktionaryURL,Pos,Neg,Neu,Inapprop.,Total,%Pos,%Neg,%Neu,sentiment,FilterOut(X),definition,idiom_id
0,American Dream,https://en.wiktionary.org/wiki/American_Dream,8,0,2,0,10,0.8,0.0,0.2,positive,,A widespread determination by Americans to pro...,0.0
1,Catch-22,https://en.wiktionary.org/wiki/Catch-22,0,7,3,0,10,0.0,0.7,0.3,negative,,A difficult situation from which there is no e...,1.0
2,Christmas present,https://en.wiktionary.org/wiki/Christmas_present,6,0,4,0,10,0.6,0.0,0.4,positive,,,2.0
3,Downing Street,https://en.wiktionary.org/wiki/Downing_Street,0,0,10,0,10,0.0,0.0,1.0,other,,,3.0
4,Dutch courage,https://en.wiktionary.org/wiki/Dutch_courage,2,2,6,0,10,0.2,0.2,0.6,other,,The courage or bravado induced by alcohol. An ...,4.0


In [4]:
def make_df_from_csv(filepath):
    dataset = pd.read_csv(filepath, index_col=0)
    idiom, sentence, emotion, emotion_id = [], [], [], []
    for i, row in dataset.iterrows():
        if row['emotion'] not in emotion2id.keys():
            continue
        idiom.append(row['idiom'])

        if row['sentence'][0] == ' ':
            full_sent = row['sentence'][1:]
        else:
            full_sent = row['sentence']


        if 'eval' in filepath:
            definition = idiom_lexicon[idiom_lexicon['Idiom'] == dataset.loc[i]['idiom']]['definition'].values
        else:
            definition = idiom_lexicon[idiom_lexicon['idiom_id'] == dataset.loc[i]['idiom_id']]['definition'].values

        if len(definition) > 0:
            full_sent += f' The definition of this idiom is "{definition[0]}."'

        sentence.append(full_sent)
        emotion.append(row['emotion'])
        emotion_id.append(emotion2id[row['emotion']])



    df = pd.DataFrame()
    df['idiom'] = idiom
    df['sentence'] = sentence
    df['emotion'] = emotion
    df['emotion_id'] = emotion_id

    return df

In [5]:
train_df = make_df_from_csv('../../../dataset/idem_train.csv')
eval_df = make_df_from_csv('../../../dataset/idem_test.csv')

In [6]:
st_train_df = pd.DataFrame({
    'text': train_df['sentence'],
    'labels': train_df['emotion_id'],
})
st_eval_df = pd.DataFrame({
    'text': eval_df['sentence'],
    'labels': eval_df['emotion_id'],
})
st_train_df.head()

Unnamed: 0,text,labels
0,"Sally finally achieved the American Dream, liv...",25
1,She couldn't help but feel a sense of pride wh...,30
2,They were envious of their neighbor's seemingl...,12
3,The immigrant family tirelessly pursued the Am...,35
4,I can't believe I'm in this Catch- situation a...,2


In [7]:
print(f'Length Training Data: {len(st_train_df)}')
print(f'Length Eval Data: {len(st_eval_df)}')

Length Training Data: 8729
Length Eval Data: 956


In [None]:
model_args = ClassificationArgs(
    num_train_epochs = 10,
    # evaluate_during_training = True,
    overwrite_output_dir = True,
    save_eval_checkpoints=True,
    train_batch_size=16,
    eval_batch_size=16
)
model = ClassificationModel(
    'xlnet',
    'xlnet-base-cased',
    args=model_args,
    num_labels=len(emotion2id.keys()),
    use_cuda=torch.cuda.is_available()
)

Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.weight', 'sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
model.train_model(st_train_df)

  0%|          | 0/8729 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/546 [00:00<?, ?it/s]



Running Epoch 1 of 10:   0%|          | 0/546 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/546 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/546 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/546 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/546 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/546 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/546 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/546 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/546 [00:00<?, ?it/s]

(5460, 0.6080937264772855)

In [None]:
predictions, raw_outputs = model.predict(st_eval_df['text'].to_list())

  0%|          | 0/956 [00:00<?, ?it/s]

  0%|          | 0/60 [00:00<?, ?it/s]

In [None]:
from sklearn.metrics import f1_score, accuracy_score

acc = accuracy_score(st_eval_df['labels'].to_list(), predictions)
f1 = f1_score(st_eval_df['labels'].to_list(), predictions, average='weighted')

print(f'Accuracy Score: {round(acc, 4)}')
print(f'F1 Score: {round(f1, 4)}')

Accuracy Score: 0.5502
F1 Score: 0.55


In [None]:
import shutil
shutil.rmtree('./outputs/')

# OpenPrompt prompt-learning

In [None]:
from openprompt.data_utils import InputExample

dataset = {}

dataset['train'] = []
for i, row in train_df.iterrows():
    input_example = InputExample(
        text_a = row['sentence'],
        text_b = row['emotion'],
        label = int(row['emotion_id']),
        guid = i
    )
    dataset['train'].append(input_example)

dataset['validation'] = []
for i, row in eval_df.iterrows():
    input_example = InputExample(
        text_a = row['sentence'],
        text_b = row['emotion'],
        label = int(row['emotion_id']),
        guid = i
    )
    dataset['validation'].append(input_example)

print(dataset['train'][0])
print(dataset['validation'][0])

In [9]:
from openprompt.plms import load_plm
plm, tokenizer, model_config, WrapperClass = load_plm("bert", "bert-base-cased")

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'bert.pooler.dense.weight', 'bert.pooler.dense.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Downloading (…)/main/tokenizer.json: 100%|██████████| 436k/436k [00:00<00:00, 1.51MB/s]


In [10]:
# Constructing Template
# A template can be constructed from the yaml config,
# but it can also be constructed by directly passing arguments
from openprompt.prompts import ManualTemplate
template_text = '{"placeholder":"text_a"} Thus the emotion of this sentence is: {"mask"}.'
mytemplate = ManualTemplate(tokenizer=tokenizer, text=template_text)

In [11]:
# To better understand how the template wraps the example, we visualize on instance:
wrapped_example = mytemplate.wrap_one_example(dataset['train'][0])
wrapped_example

[[{'text': 'Sally finally achieved the American Dream, living in a beautiful house with a white picket fence and a happy family. The definition of this idiom is "A widespread determination by Americans to provide their children with a better upbringing than their parents were able to provide for them. A philosophy that with perseverance, courage and determination, anyone can prosper and achieve success.."',
   'loss_ids': 0,
   'shortenable_ids': 1},
  {'text': ' Thus the emotion of this sentence is:',
   'loss_ids': 0,
   'shortenable_ids': 0},
  {'text': '<mask>', 'loss_ids': 1, 'shortenable_ids': 0},
  {'text': '.', 'loss_ids': 0, 'shortenable_ids': 0}],
 {'guid': 0, 'label': 25}]

In [12]:
# Using the WrapperClass is recommeded, as it is tailored for the InputExample class
wrapped_t5tokenizer = WrapperClass(max_seq_length=128, decoder_max_length=3, tokenizer=tokenizer, truncate_method='head')
# or
# from openprompt.plms import T5TokenizerWrapper
# wrapped_t5tokenizer = T5TokenizerWrapper(max_seq_length=128, decoder_max_length=3, tokenizer=tokenizer, truncate_method='head')

In [13]:
# Tokenized example:
tokenized_example = wrapped_t5tokenizer.tokenize_one_example(wrapped_example, teacher_forcing=False)
print(tokenized_example)
print(tokenizer.convert_ids_to_tokens(tokenized_example['input_ids']))

{'input_ids': [101, 8595, 1921, 3890, 1103, 1237, 6525, 117, 1690, 1107, 170, 2712, 1402, 1114, 170, 1653, 3368, 2105, 8617, 1105, 170, 2816, 1266, 119, 1109, 5754, 1104, 1142, 25021, 2660, 1306, 1110, 107, 138, 6506, 9220, 1118, 4038, 1106, 2194, 1147, 1482, 1114, 170, 1618, 27981, 1190, 1147, 2153, 1127, 1682, 1106, 2194, 1111, 1172, 119, 138, 5027, 1115, 1114, 1679, 2217, 24374, 117, 9163, 1105, 9220, 117, 2256, 1169, 5250, 20623, 1105, 5515, 2244, 119, 119, 107, 4516, 1103, 7471, 1104, 1142, 5650, 1110, 131, 103, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'loss_ids': [-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, -100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [14]:
# Now it's time to convert the whole dataset
from openprompt import PromptDataLoader

train_dataloader = PromptDataLoader(
    dataset = dataset["train"],
    template = mytemplate,
    tokenizer = tokenizer,
    tokenizer_wrapper_class = WrapperClass,
    max_seq_length = 256,
    decoder_max_length = 3,
    batch_size = 16,
    shuffle = True,
    teacher_forcing = False,
    predict_eos_token = False,
    truncate_method = "head")

tokenizing: 8729it [00:04, 2076.84it/s]


In [None]:
# Define the verbalizer
# for example the verbalizer contains multiple label words in each class
from openprompt.prompts import ManualVerbalizer
import torch

myverbalizer = ManualVerbalizer(tokenizer, num_classes=3, label_words=[[key] for key in emotion2id.keys()])
print(myverbalizer.label_words_ids)

logits = torch.randn(2, len(tokenizer))
print(myverbalizer.process_logits(logits))

In [16]:
# Although we can manuall combine the plm, template and verbalizer together, we provide a pipeline
# model which takes the batched data from the PromptDataLoader and produce class-wise logits
from openprompt import PromptForClassification

use_cuda = True
prompt_model = PromptForClassification(
    plm = plm,
    template = mytemplate,
    verbalizer = myverbalizer,
    freeze_plm = False)

if use_cuda:
    prompt_model = prompt_model.cuda()

In [None]:
# Standard training setup
from transformers import AdamW, get_linear_schedule_with_warmup

loss_func = torch.nn.CrossEntropyLoss()
no_decay = ['bias', 'LayerNorm.weight']

# it's always good practice to set  no decay to bias and LayerNorm (?)
optimizer_grouped_parameters = [
    {'params': [p for n, p in prompt_model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in prompt_model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
]

optimizer = AdamW(optimizer_grouped_parameters, lr=1e-4)

In [None]:
for epoch in range(10):
    tot_loss = 0
    for step, inputs in enumerate(train_dataloader):
        if use_cuda:
            inputs = inputs.cuda()
        logits = prompt_model(inputs)
        labels = inputs['label']
        loss = loss_func(logits, labels)
        loss.backward()
        tot_loss += loss.item()
        optimizer.step()
        optimizer.zero_grad()
        if step % 100 == 1:
            print(f'Epoch {epoch}, average loss: {tot_loss / (step + 1)}', flush=True)

In [None]:
# Evaluation
validation_dataloader = PromptDataLoader(
    dataset = dataset["validation"],
    template = mytemplate,
    tokenizer = tokenizer,
    tokenizer_wrapper_class = WrapperClass,
    max_seq_length = 256,
    decoder_max_length = 3,
    batch_size = 16,
    shuffle = False,
    teacher_forcing = False,
    predict_eos_token = False,
    truncate_method = "head")

In [None]:
allpreds = []
alllabels = []
for step, inputs in enumerate(validation_dataloader):
    if use_cuda:
        inputs = inputs.cuda()
    logits = prompt_model(inputs)
    labels = inputs['label']
    alllabels.extend(labels.cpu().tolist())
    allpreds.extend(torch.argmax(logits, dim=-1).cpu().tolist())

In [None]:
from sklearn.metrics import accuracy_score, f1_score

acc = accuracy_score(allpreds, alllabels)
f1 = f1_score(allpreds, alllabels, average='weighted')

print(f'Acc: {acc}')
print(f'F1: {f1}')

In [None]:
for item in validation_dataloader:
    item = item.cuda()
    print(item)
    print(tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(item['input_ids'][0])).replace('<pad>', ''))
    logits = prompt_model(item)
    label = item['label']
    preds = torch.argmax(logits, dim=-1)
    print(f'Predicted: "{id2emotion[int(preds[0])]}", True: "{id2emotion[int(label[0])]}"')
    break