In [1]:
import json
import os
import random
import re
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()


def cal_metrics(labels, answers):
    accuracy = accuracy_score(labels, answers)
    precision = precision_score(labels, answers)
    recall = recall_score(labels, answers)
    f1 = f1_score(labels, answers)

    return {"acc": accuracy, "prec": precision, "recall": recall, "f1": f1}


def load_data(path, train_dataset=None, train=False):
    def remove_non_letters(s):
        return re.sub(r"^[^a-zA-Z]+|[^a-zA-Z]+$", "", s)

    with open("../data/dictionary.json", "r", encoding="utf-8") as f:
        dictionary = json.load(f)

    with open("../data/contexts.json", "r", encoding="utf-8") as f:
        contexts = json.load(f)

    if train_dataset is not None:
        with open(os.path.join(path, "distances.json"), "r") as f:
            distances = json.load(f)

        with open(os.path.join(path, "vals.json"), "r") as f:
            vals = json.load(f)

    if train:
        data_path = os.path.join(path, "train.tsv")
    else:
        data_path = os.path.join(path, "test.tsv")

    labels = []
    dataset = []

    with open(data_path, "r", encoding="utf-8") as f:
        lines = f.readlines()

    for index, line in enumerate(lines):
        if index == 0:
            continue
        cells = line.strip().split("\t")

        label = int(cells[1])
        sent = cells[2]
        POS = cells[3]
        v_index = int(cells[-1])
        word = remove_non_letters(sent.split()[v_index])

        splits = sent.split()
        splits.insert(v_index, "<tar>")
        splits.insert(v_index + 2, "</tar>")

        sample = {"sentence": sent, "word": word, "label": label, "pos": POS, "v_index": v_index, "s_sentence": " ".join(splits)}
        if train_dataset is not None:
            shots = random.sample(train_dataset, 10)

            sample["shots"] = shots
        if train_dataset is not None:
            samples_distances = distances[index - 1]
            samples_ids = vals[index - 1]
            sample["samples_distances"] = samples_distances
            sample["samples_knn"] = [train_dataset[_id] for _id in samples_ids]
        word = word.lower()
        base_words = [word]
        for pos in ["v", "a", "r", "s", "n"]:
            base_word = lemmatizer.lemmatize(word, pos)
            if base_word in base_words:
                continue
            base_words.append(base_word)

        for word in base_words:
            if word in dictionary:
                dict_info = dictionary[word.lower()]
                sample["dict_word"] = word
                sample["dict"] = dict_info
                break
            else:
                sample["dict"] = {}

        for word in base_words:
            if word in contexts:
                sample["pos_sent"] = contexts[word]["pos"][0]
                sample["neg_sent"] = contexts[word]["neg"][0]
                sample["exam_word"] = word
                break

        dataset.append(sample)
        labels.append(int(cells[1]))

    return dataset, labels
train_dataset, _ = load_data("../data/VUA18", train=True)

In [4]:
### OPENAI
import os

from openai import OpenAI

llm_type = "gpt-4o-2024-08-06"
api_key = ""
api_base = ""
client = OpenAI(api_key=api_key, base_url=api_base)

from tqdm import tqdm

global_vars = {"EXAMNUM": 2}


def get_response(llm_type, prompt, temp=1.0):
    completion = client.chat.completions.create(
        model=llm_type,
        stream=False,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt},
        ],
        temperature=temp,
        timeout=600,
    )
    return completion.choices[0].message.content


def load_prompts(dataset, func):
    res = []
    for data in dataset:
        res.append(func(data))
    return res

In [None]:
import time


def prompt_func(data):
    prompt_template = """## Backgroud

A metaphor is a rhetorical device that makes a non-literal comparison between two unlike things.

There are different types of metaphors:
- Direct Metaphor: Explicitly compares two unrelated things by stating that one thing is another, usually using a form of the verb "be".
- Implied Metaphor: Compares two unlike things without explicitly naming one of them, often using a non-literal verb.
- Extended Metaphor: Develops an initial comparison over several lines or paragraphs.
- Mixed Metaphor: Combines two or more metaphors, often resulting in a confusing or nonsensical statement, usually unintentionally humorous.
- Dead Metaphor: A metaphor that has become so familiar due to repeated use that people no longer recognize it as a metaphor, understanding it as having a straightforward meaning.

The MIP (Metaphor Identification Procedure) theory and the SPV (Selectional Preference Violation) theory are two theories that determine whether a word expresses metaphor in a sentence.

### Metaphor Identification Procedure

MIP is a systematic method used to identify metaphorical expressions in text. The core idea is to compare the literal meaning of a word with its contextual meaning to determine if a word is metaphorical. 
Here are the specific steps of MIP:
1. Select the text to analyze: Choose a segment of text or a sentence.
2. Tokenization and annotation: Break down the text into individual words and annotate each word with its basic (literal) meaning.
3. Compare literal and contextual meanings: For each word, determine if its meaning in the current context differs from its common literal meaning.
4. Identify metaphorical usage: If the contextual meaning of a word is significantly different from its literal meaning and this difference is understood metaphorically, then the word is identified as metaphorical.

### Selectional Preference Violation

SPV is a method used to identify metaphors by analyzing the anomaly of a target word in the context of its surrounding words. The core idea is that metaphorical words often violate their selectional preferences in a given context.
The specific steps of SPV are as follows:
1. Identify the target word and its surrounding words: Choose a target word and analyze its surrounding words (usually adjacent words or phrases).
2. Analyze selectional preferences: Determine the normal selectional preferences of the target word, i.e., the types of words it typically appears with in a given context.
3. Detect anomalies: Assess whether the usage of the target word in the current context violates its normal selectional preferences. If the target word's usage appears unusual or does not conform to its typical selectional preferences, it is likely to be metaphorical.

## Instruction

Please use the following step-by-step instructions to check if the given word expresses metaphorical in the given sentence:
1. Identify the Target Word:
 - Locate the target word in the provided sentence.
2. Contextual Meaning (SPV Theory):
 - Analyze the sentence to understand the contextual meaning of the target word.
 - Determine if the target word is used to describe a source domain (the literal meaning) and a target domain (the metaphorical meaning).
3. Basic Meaning (MIP Theory):
 - Determine the most basic meaning of the target word that is more concrete, related to bodily action, more precise, and historically older.
 - Check if this basic meaning contrasts with the contextual meaning identified in step 2.
4. Check for Metaphorical Usage (MIP Theory):
 - If there is a contrast between the basic meaning and the contextual meaning, consider if the contextual meaning can be understood in terms of the basic meaning (i.e., the target word is used metaphorically).
5. SPV Analysis:
 - Determine if the target word involves personification (assigning human characteristics to non-human entities) or if it acts as a vehicle (metaphorical term that carries meaning from the source to the target).
6. Final Determination:
 - Based on the analysis, conclude whether the target word is used metaphorically in the given sentence.
7. Answer:
 - Select final answer from 'yes' or 'no'.

## Input

- [sentence] {sentence}
- [word] {word}
- [dictionary] {dict_info}
"""

    def get_dict_prompt(data):
        if "dict_word" not in data or "dict" not in data:
            return ""

        dict_template = """{word}:""".format(word=data["dict_word"])

        dict_prompt = ""
        index = 1

        for dict_info in data["dict"]:
            try:
                key = dict_info["definition"]
                item = dict_info["examples"][0]
                if not key or not item:
                    continue
                dict_prompt += """definition {index}: {key}
    - example: {item}
""".format(
                    key=dict_info["definition"], index=index, item=dict_info["examples"][0]
                )
                index += 1
                if index == 3:
                    break
            except:
                continue

        return dict_template + "\n" + dict_prompt.rstrip()

    return prompt_template.format(sentence=data["sentence"], word=data["word"], dict_info=get_dict_prompt(data))


for times in [1, 2, 3]:
    for data_name in [
        "MOH-X",
        "TroFi",
    ]:

        setting = f"explicit-{times}"

        data_dir = f"../data/EVAL-samples/{data_name}"

        output_dir = f"./results/{setting}/{data_name}/{llm_type}"
        os.makedirs(output_dir, exist_ok=True)
        print(f"===================={output_dir}=====================")

        dataset, labels = load_data(data_dir, train_dataset=train_dataset)

        prompts = load_prompts(dataset, prompt_func)

        with open(os.path.join(output_dir, "prompts.json"), "w") as f:
            json.dump(prompts, f)
        while True:
            try:
                with open(os.path.join(output_dir, "responses.json"), "r") as f:
                    responses = json.load(f)
            except:
                responses = []

            if len(dataset) == len(responses):
                break

            try:
                for index, prompt in tqdm(enumerate(prompts[len(responses) :]), total=len(prompts) - len(responses)):
                    response = get_response(llm_type, prompt, temp=0)
                    responses.append(response)
                    if index % 50 == 0:
                        with open(os.path.join(output_dir, "responses.json"), "w") as f:
                            json.dump(responses, f)

            except Exception as e:
                with open(os.path.join(output_dir, "responses.json"), "w") as f:
                    json.dump(responses, f)
                time.sleep(5)
                continue

            with open(os.path.join(output_dir, "responses.json"), "w") as f:
                json.dump(responses, f)

        def extract_answer(response):
            match = re.search(r"Answer: (yes|no)", response, re.IGNORECASE)
            try:
                answer = match.group(1).lower()
                if "yes" in answer:
                    return 1
                else:
                    return 0
            except Exception as e:
                answer = 1 if response.lower().rfind("yes") > response.lower().rfind("no") else 0
            return answer

        preds = [extract_answer(response) for response in responses]
        with open(os.path.join(output_dir, "preds.json"), "w") as f:
            json.dump(preds, f)
        metrics = cal_metrics(labels, preds)
        with open(os.path.join(output_dir, "metrics.json"), "w") as f:
            json.dump(metrics, f)