Following KI done in [Gretkowski et al (2022)](https://link.springer.com/chapter/10.1007/978-3-031-17105-5_7) we prompt the model with a template filled by the target sentece and word in context we want to classify as metaphor or not and the additional knowledge extracted from [].

We use minimal prompting for the task with the shape:
`[SEP] [target sentence] [SEP] [target word] [SEP] [Injected Knowledge] + [Label]`

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install transformers
!pip install datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
import numpy as np
import pandas as pd
import re
import os
import json
from sklearn.model_selection import train_test_split
from sklearn.metrics import top_k_accuracy_score,confusion_matrix, classification_report
import itertools

from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer
from datasets import Dataset, load_metric
import torch

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [5]:
def verb_row_KI(row, template, tokenizer):
    w1 = str(row['sentence'])
    w2 = str(row['target_word'])
    w3 = str(row['injected_knowledge'])
    rel = str(row.label)
    sentence = re.sub("<W1>", w1, template)
    sentence = re.sub("<W2>", w2, sentence)
    sentence = re.sub("<W3>", w3, sentence)
    sentence = re.sub("<SEP>", tokenizer.sep_token, sentence)
    return (sentence, rel)

def verb_row(row, template, tokenizer):
    w1 = str(row['sentence'])
    w2 = str(row['target_word'])
    rel = str(row.label)
    sentence = re.sub("<W1>", w1, template)
    sentence = re.sub("<W2>", w2, sentence)
    sentence = re.sub("<SEP>", tokenizer.sep_token, sentence)
    return (sentence, rel)

def preprocess_function(rows):
    inputs = tokenizer(rows['verb'], truncation=True, padding='max_length', max_length=64)
    return inputs

def compute_metrics(eval_pred):
  '''
  Compute metrics for a Trainer.

  Args:
    eval_pred: object of type transformers.EvalPrediction. It is a tuple with
    predictions (logits) and real labels.

  Returns:
    A dictionary of metrics {'name_metric1':value1,...}
  '''
  predictions, labels = eval_pred
  predictions = np.argmax(predictions, axis = 1)
  return metric.compute(predictions=predictions, references=labels, average='micro')


def results_row(row):
    pred = (row['pred'])
    gold = (row['labels'])
    if pred == gold:
      row['results'] = True
    else:
      row['results'] = False
    return (row)

In [6]:
test = pd.read_csv('/content/drive/MyDrive/knowledge injection for metaphor identification sdllod23/datasets/datasets/VUA-V/test.csv',
                   sep=',',  index_col=False)
train = pd.read_csv('/content/drive/MyDrive/knowledge injection for metaphor identification sdllod23/datasets/datasets/VUA-V/train.csv',
                    sep=',', index_col=False, encoding='latin1')
test

Unnamed: 0,sentence,label,target_position,target_word,pos_tag,gloss,eg_sent
0,Design : Crossed lines over the toytown tram :...,1,2,cross,VERB,Go or extend across or to the other side of (a...,she has crossed the Atlantic twice
1,Design : Crossed lines over the toytown tram :...,0,20,say,VERB,"Utter words so as to convey information, an op...","‘Thank you,’ he said"
2,"MODERN trams , as most continental Europeans k...",0,7,know,VERB,"Be aware of through observation, inquiry, or i...",most people know that CFCs can damage the ozon...
3,"MODERN trams , as most continental Europeans k...",0,10,shake,VERB,(of a structure or area of land) tremble or vi...,buildings shook in Sacramento and tremors were...
4,"MODERN trams , as most continental Europeans k...",0,12,rattle,VERB,Make or cause to make a rapid succession of sh...,the roof rattled with little gusts of wind
...,...,...,...,...,...,...,...
5868,Aristotle said something very interesting in t...,1,1,say,VERB,"Utter words so as to convey information, an op...","‘Thank you,’ he said"
5869,Aristotle said something very interesting in t...,0,13,quote,VERB,Repeat or copy out (words from a text or speec...,I realized she was quoting passages from Shake...
5870,Aristotle said something very interesting in t...,1,1,say,VERB,"Utter words so as to convey information, an op...","‘Thank you,’ he said"
5871,Aristotle said something very interesting in t...,0,27,lack,VERB,Be without or deficient in.,the novel lacks imagination


In [7]:
metric_name = 'f1'
model_name = 'roberta-base'
metric = load_metric(metric_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
from transformers import EarlyStoppingCallback

  metric = load_metric(metric_name)


In [8]:
#template =   "<SEP> <W1> <SEP> <W2> <SEP> <W3>" #for injected knowledge
template = "<SEP> <W1> <SEP> <W2>"
verb_two_pairs_train = train.apply(verb_row, axis=1, result_type="expand",template=template, tokenizer=tokenizer)

In [9]:
verb_two_pairs_train.columns=['verb', 'rel']
verb_two_pairs_train['labels']=verb_two_pairs_train['rel'].astype('category').cat.codes

verb_two_pairs_test = test.apply(verb_row, axis=1, result_type="expand",template=template, tokenizer=tokenizer)
verb_two_pairs_test.columns=['verb', 'rel']
verb_two_pairs_test['labels']=verb_two_pairs_test['rel'].astype('category').cat.codes

print(verb_two_pairs_test)

                                                   verb rel  labels
0     </s> Design : Crossed lines over the toytown t...   1       1
1     </s> Design : Crossed lines over the toytown t...   0       0
2     </s> MODERN trams , as most continental Europe...   0       0
3     </s> MODERN trams , as most continental Europe...   0       0
4     </s> MODERN trams , as most continental Europe...   0       0
...                                                 ...  ..     ...
5868  </s> Aristotle said something very interesting...   1       1
5869  </s> Aristotle said something very interesting...   0       0
5870  </s> Aristotle said something very interesting...   1       1
5871  </s> Aristotle said something very interesting...   0       0
5872                  </s> What did he mean ? </s> mean   0       0

[5873 rows x 3 columns]


In [10]:
!pip install transformers[torch]
!pip install accelerate -U

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [11]:
NUM_LABELS = len(verb_two_pairs_train.labels.unique())
model = AutoModelForSequenceClassification.from_pretrained(model_name,num_labels=NUM_LABELS)
model.to(device)

verb_train = Dataset.from_pandas(verb_two_pairs_train)
verb_test = Dataset.from_pandas(verb_two_pairs_test)

encoded_verb_train = verb_train.map(preprocess_function, batched=True, batch_size=None)
encoded_verb_test = verb_test.map(preprocess_function, batched=True, batch_size=None)
encoded_verb_train=encoded_verb_train.remove_columns(['rel', 'verb'])
encoded_verb_test=encoded_verb_test.remove_columns(['rel', 'verb'])

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should pr

Map:   0%|          | 0/15516 [00:00<?, ? examples/s]

Map:   0%|          | 0/5873 [00:00<?, ? examples/s]

In [12]:
encoded_verb_train.set_format("torch")
encoded_verb_test.set_format("torch")

In [13]:
#batch size and epochs.
batch_size = 32

args_train = TrainingArguments(
    output_dir='my_checkpoints',
    overwrite_output_dir=True,
    evaluation_strategy="no",
    save_strategy="no",
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size*2,
    optim="adamw_torch",
    learning_rate=2e-5,
    weight_decay=0.01,
    #fp16=True,
    logging_steps=10,
    load_best_model_at_end=False,
    metric_for_best_model='f1',
    num_train_epochs=10,
    report_to='all',
    #save_total_limit = 3
)

trainer = Trainer(
    model, #model to train
    args_train,  #arguments to train
    train_dataset=encoded_verb_train,
    tokenizer=tokenizer, #it is needed the tokenizer that encoded the data for batch
    compute_metrics=compute_metrics, #to compute metric of the model,
    #callbacks = [EarlyStoppingCallback(early_stopping_patience=3)]
)

#start training
trainer.train()

You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
10,0.6336
20,0.6347
30,0.558
40,0.5964
50,0.607
60,0.6185
70,0.5765
80,0.5457
90,0.5423
100,0.5625


Step,Training Loss
10,0.6336
20,0.6347
30,0.558
40,0.5964
50,0.607
60,0.6185
70,0.5765
80,0.5457
90,0.5423
100,0.5625


TrainOutput(global_step=4850, training_loss=0.2005841738722988, metrics={'train_runtime': 1730.3964, 'train_samples_per_second': 89.667, 'train_steps_per_second': 2.803, 'total_flos': 5103038918707200.0, 'train_loss': 0.2005841738722988, 'epoch': 10.0})

In [14]:
predicciones = trainer.predict(test_dataset=encoded_verb_test)

#calculate the predicted labels 0/1 based on the field predictions of the object predicciones
#predicciones.predictions contains the logits
pred = np.argmax(predicciones.predictions, axis = 1)
print(metric.compute(predictions=pred, references=predicciones.label_ids, average='micro'))
#print(top_k_accuracy_score(predicciones.label_ids, predicciones.predictions,k=1))
lab = np.sort(test.label.unique())
d=pd.DataFrame(confusion_matrix(predicciones.label_ids,y_pred =pred), index = np.sort(test.label.unique()))
d.columns = np.sort(test.label.unique())
results_acc = (classification_report(predicciones.label_ids, pred, digits=4, output_dict=True))
results_words = pd.DataFrame({'pred':pred, 'rel':np.array(test.label), 'labels':predicciones.label_ids, 'source':np.array(test.sentence), 'target':np.array(test.target_word)})
results_words.apply(results_row, axis=1)
print(d)

{'f1': 0.8317725183041035}
      0     1
0  3814   298
1   690  1071


In [19]:
results_words.to_csv('/content/drive/MyDrive/results_vuav_noKI_1_15_06_23.csv')

In [18]:
results_acc
metric_acc = metric.compute(predictions=pred, references=predicciones.label_ids, average='micro')
conf_matrix = d

with open('/content/drive/MyDrive/results_vuav_noKI_1_15_06_23.txt', 'w') as f:
    f.write(json.dumps(results_acc))
    f.write(json.dumps(metric_acc))
    f.write(conf_matrix.to_markdown())
