In [4]:
import xml.etree.ElementTree as ET
import re
from dataclasses import dataclass
from typing import List

In [8]:
@dataclass
class metaphorSentance:
    sent: str
    pos: str
    metaIdx: int
    punc: str = ".,!?;“”"

    def split(self, string: str, delim = ' '):
        return re.findall(rf"[\w']+|[{self.punc}]", string)

    def __iter__(self):
        sentSplit = self.split(self.sent)
        posSplit = self.split(self.pos)
        assert len(posSplit) == len(sentSplit)
        for idx, item in enumerate(sentSplit):
            yield idx in self.metaIdx, item, posSplit[idx]



In [14]:
dir = './DeepMet/data/VUA/raw/'

data = {
    "Obama-waterMeta": {
        "name": "art1",
        "data": [
            metaphorSentance(
                "President Obama urged Congress on Tuesday to quickly provide almost $4 billion \
                to confront a surge of young migrants from Central America crossing the border into \
                Texas, calling it “an urgent humanitarian situation.”",
                "ADJ NN1 VRB PROPN PRP PROP PRP AUX VRB QT QT NN1 PRP VRB X AUX PRP ADJ NN1 PRP ADJ NN1 VRB X NN1 PRP PRON PUNC VRB PRON PUNC X ADJ ADJ NN1 PUNC PUNC",
                [15, 18]
            ),
            metaphorSentance(
                "The president said he needed the money to set up new detention facilities, \
                conduct more aerial surveillance and hire immigration judges and Border \
                Patrol agents to respond to the flood of 52,000 children."
                "X PROPN VRB PRON AUX X NN1 PROP VRB PROP ADJ ADJ NN1 PUNC VRB ADJ ADJ VRB CONJ VRB ADJ NN1 CONJ ADJ ADJ NN1 PROP VRB PROP X VRB PROP NN1 PUNC NN1 NN1",
                [30, 35]
            ),
            metaphorSentance(
                "Their sudden mass migration has overwhelmed local resources and touched off protests from residents angry about the impact on the local economy.",
                "PROPN ADV ADV VRB AUX VRB ADJ NN1 CONJ VRB PROP VRB PROP NN1 ADJ PROP X VRB PROP X ADJ NN1",
                [3, 5]
            ),
            metaphorSentance(
                "In a letter to congressional leaders, Mr. Obama urged them to “act expeditiously” on his request.",
                "",
                []
            )
        ]
    },
    "Deport-waterMeta": {
        "name": "art2",
        "data": [
            metaphorSentance(
                "",
                "",
                []
            ),
            metaphorSentance(
                "",
                "",
                []
            ),
            metaphorSentance(
                "",
                "",
                []
            ),
            metaphorSentance(
                "",
                "",
                []
            )
        ]
    }
}

In [15]:
for meta, word, pos in data["waterMeta"]["data"][0]:
    print(f'Word: {word}, is metaphor: {meta}, part of speech: {pos}')

Word: President, is metaphor: False, part of speech: ADJ
Word: Obama, is metaphor: False, part of speech: NN1
Word: urged, is metaphor: False, part of speech: VRB
Word: Congress, is metaphor: False, part of speech: PROPN
Word: on, is metaphor: False, part of speech: PRP
Word: Tuesday, is metaphor: False, part of speech: PROP
Word: to, is metaphor: False, part of speech: PRP
Word: quickly, is metaphor: False, part of speech: AUX
Word: provide, is metaphor: False, part of speech: VRB
Word: almost, is metaphor: False, part of speech: QT
Word: 4, is metaphor: False, part of speech: QT
Word: billion, is metaphor: False, part of speech: NN1
Word: to, is metaphor: False, part of speech: PRP
Word: confront, is metaphor: False, part of speech: VRB
Word: a, is metaphor: False, part of speech: X
Word: surge, is metaphor: True, part of speech: AUX
Word: of, is metaphor: False, part of speech: PRP
Word: young, is metaphor: False, part of speech: ADJ
Word: migrants, is metaphor: True, part of speech