In [1]:
from typing import List, Optional

from nltk.tokenize import word_tokenize

In [2]:
def print_if_verbose(text: str, verbose: bool) -> None:
    """Print text if verbose is True."""
    if verbose:
        print(text)

In [3]:
def lowercase_and_tokenize(s: str) -> List[str]:
    """Split into tokens and lowercase them."""
    return [t.lower() for t in word_tokenize(s)]


def get_first_token(s: str) -> Optional[str]:
    """Return the first token.

    If s is empty, None is returned. The returned
    token is lowercased.
    """
    tokenized = word_tokenize(s)
    if len(tokenized) == 0:
        return None
    else:
        return tokenized[0].lower()

In [4]:
punctuation = set([".", ",", "!", "?", ";"])  # punctuation symbols


def remove_trailing_punctuation(string: str) -> str:
    """Remove the last character if it is a punctuation symbol."""
    if len(string) == 0 or string[-1] not in punctuation:
        return string
    else:
        return string[:-1]


def fix_spacing(string: str) -> str:
    """Removes space before the last character if it's a punctuation symbol."""
    if len(string) > 0 and string[-1] in punctuation:
        return string[:-1].strip() + string[-1]
    else:
        return string


def remove_leading_punctuation(string: str) -> str:
    """Removes the first character if it is a punctuation symbol."""
    if len(string) == 0 or string[0] not in punctuation:
        return string
    else:
        return string[1:].strip()

In [5]:
def uppercase_first_letter(string: str) -> str:
    """Uppercase the first character.

    The function skips over space, quotation marks, and/or brackets at
    the beginning of the string.
    """
    pointer = 0
    while pointer < len(string) and (
        string[pointer] in {"'", '"', "(", "{", "["}
        or string[pointer].isspace()
    ):
        pointer += 1
    if pointer >= len(string):
        return string
    else:
        return (
            string[:pointer]
            + string[pointer].upper()
            + string[(pointer + 1) :]
        )

In [7]:
# Loading connectives
with open("aux/connectives.txt", "rt") as f:
    connectives = set([line.strip() for line in f.readlines()])


def trim_connective(string: str) -> str:
    """Remove a connective at the beginning of string."""
    lower = string.lower()
    for c in connectives:
        if lower[: len(c)] == c:
            if c != "last" or (len(lower) > len(c) and lower[len(c)] == ","):
                return remove_leading_punctuation(string[len(c) :])
            else:
                return string
    return string

In [8]:
def get_relation_type(relation):
    """Returns the type of the relation.

    If the relation parameter is None, '-' is returned.
    """
    if relation is None:
        return "-"
    else:
        return relation.type

In [9]:
def remove_extra_space(s: str) -> str:
    """Remove excessive whitespace characters.

    E.g. "foo    bar" -> "foo bar"
    """
    return " ".join(s.split())


def contains_any_of(s: str, s_array: List[str]) -> bool:
    """Check if s contains a string from s_array.

    If a string from s_array is a substring of s, return True.
    Otherwise return False.
    """
    for other_s in s_array:
        if other_s in s:
            return True
    return False

In [10]:
def is_nn(relation):
    """Check if a given relation consists of two nuclei."""
    return (
        relation is not None
        and relation.left.type == "N"
        and relation.right.type == "N"
    )

In [11]:
def fix_quotes(sent: str) -> str:
    """Fix enclosing quotes.

    Strips off the existing quotes and replace them
    with ".
    """
    #     stripped = sent.strip()
    #     if len(stripped) > 0 and stripped[0] in {"'", '"', '`'}:
    #         return '"' + stripped.strip("'\"`") + '"'
    #     else:
    #         return sent
    assert sent is not None
    return '"' + sent.strip("'\"`") + '"'

In [12]:
def is_background(relation):
    """Check if a given relation is Background."""
    return relation is not None and relation.type == "Background"


def has_nested_background(relation):
    """Check if a given relation has nested Background subrelations."""
    return relation is not None and (
        is_background(relation.left_child)
        or is_background(relation.right_child)
        or has_nested_background(relation.left_child)
        or has_nested_background(relation.right_child)
    )


if __name__ == "__main__" and "__file__" not in globals():

    import import_ipynb  # noqa: F401, needed to import relation_extraction
    import relation_extraction

    def test__has_nested_background():
        r010 = relation_extraction.Relation(
            "Background", None, None, None, None
        )
        r011 = relation_extraction.Relation("-", None, None, None, None)
        r000 = relation_extraction.Relation("-", None, None, None, None)
        r001 = relation_extraction.Relation("-", None, None, None, None)
        r01 = relation_extraction.Relation("-", None, None, r010, r011)
        r00 = relation_extraction.Relation("-", None, None, r000, r001)
        r0 = relation_extraction.Relation("-", None, None, r00, r01)

        assert has_nested_background(r0)

        r010 = relation_extraction.Relation("-", None, None, None, None)
        r011 = relation_extraction.Relation("-", None, None, None, None)
        r000 = relation_extraction.Relation("-", None, None, None, None)
        r001 = relation_extraction.Relation("-", None, None, None, None)
        r01 = relation_extraction.Relation("-", None, None, r010, r011)
        r00 = relation_extraction.Relation("-", None, None, r000, r001)
        r0 = relation_extraction.Relation("-", None, None, r00, r01)

        assert has_nested_background(r0) is False

        r010 = relation_extraction.Relation("-", None, None, None, None)
        r011 = relation_extraction.Relation("-", None, None, None, None)
        r000 = relation_extraction.Relation("-", None, None, None, None)
        r001 = relation_extraction.Relation("-", None, None, None, None)
        r01 = relation_extraction.Relation(
            "Background", None, None, r010, r011
        )
        r00 = relation_extraction.Relation("-", None, None, r000, r001)
        r0 = relation_extraction.Relation("-", None, None, r00, r01)

        assert has_nested_background(r0)

        assert has_nested_background(None) is False

    test__has_nested_background()

importing Jupyter notebook from relation_extraction.ipynb
