In [None]:
class Base:
    # There is not a defined conversion method for these words.
    # This is the default list of irregular nouns. It maps the
    # the singular version to the plural version (SP).
    IRREGULAR_NOUNS_SP = {
        "ox": "oxen",
        "goose": "geese",
        "mouse": "mice",
        "bacterium": "bacteria"
    }

    # This is the reversed version of the dictionary above, meaning 
    # that the plural version is mapped to the singular version 
    # (PS).
    IRREGULAR_NOUNS_PS = {v: k for k, v in IRREGULAR_NOUNS_SP.items()}
    
    # The singular and plural versions of these words are the same. 
    # This is the default list of zero plural nouns.
    ZERO_PLURAL_NOUNS = [
        "species", 
        "deer", 
        "fish", 
        "moose", 
        "sheep", 
        "swine", 
        "buffalo", 
        "trout", 
        "cattle"
    ]

    # These pairs of characters define symbols that enclose other
    # information in a text.
    ENCLOSURES = {
        "(": ")",
        "[": "]",
        "{": "}"
    }

    LAX_ENCLOSURES = {
        "(": ")",
        "[": "]",
        "{": "}",
        "—": "—"
    }


    
    def __init__(self, main, irregular_nouns_sp=IRREGULAR_NOUNS_SP, irregular_nouns_ps=IRREGULAR_NOUNS_PS, zero_plural_nouns=ZERO_PLURAL_NOUNS):
        self.main = main
        self.zero_plural_nouns = zero_plural_nouns
        self.irregular_nouns_sp = irregular_nouns_sp
        self.irregular_nouns_ps = irregular_nouns_ps
        self.irregular_plural_nouns = list(self.irregular_nouns_sp.values())
        self.irregular_singular_nouns = list(self.irregular_nouns_sp.keys())



    def delete_extra_whitespace(self, string):
        # Duplicate spaces, spaces before punctuation marks,
        # and outside spaces are removed.
        string = re.sub(r"\s+", " ", string)
        string = re.sub(r"\s+([?.!,])", r"\1", string)
        string = string.strip()
        return string



    def delete_outer_non_alnum(self, string):
        while string:
            start_len = len(string)
            # Remove Leading Non-Alphanumeric Character
            if string and not string[0].isalnum():
                string = string[1:]
            # Remove Trailing Non-Alphanumeric Character
            if string and not string[-1].isalnum():
                string = string[:-1]
            # No Changes Made
            if start_len == len(string):
                break
        return string



    def get_parentheticals(self, text, enclosures=ENCLOSURES, flatten=False):
        # The parenthetical would be the content inside of a pair
        # of matching parentheses, brackets, or braces.
        parentheticals = []
        
        # This contains the text that's not inside of any
        # enclosure.
        base_text = []
        
        # This is used for building groups, which often has a 
        # nested structure.
        stacks = []
        
        # These are the pairs of characters that we recognize
        # as defining the parenthetical.
        openers = list(enclosures.keys())
        closers = list(enclosures.values())
        
        # This contains the opening characters of the groups 
        # that are currently open (e.g. '(', '['). We use it 
        # so that we know whether to open or close a group.
        opened = []
        
        for i, char in enumerate(text):
            # Open Group
            if char in openers:
                stacks.append([])
                opened.append(char)
            # Close Group
            elif opened and char == enclosures.get(opened[-1], ""):
                parentheticals.append(stacks.pop())
                opened.pop()
            # Add to Group
            elif opened:
                stacks[-1].append(i)
            # Add to Base Text
            else:
                base_text.append(i)
        
        # We close the remaining groups that have not
        # been closed.
        while stacks:
            parentheticals.append(stacks.pop())
            
        # Cluster Groups' Indices
        # A list in the lists of indices (where each list represents a group of text) could have 
        # an interruption (e.g. [0, 1, 2, 10 15]) because of a parenthetical. So, we cluster the
        # indices in each list to make the output more useful (e.g. [(0, 3), (10, 16)]).
        lists_of_indices = [*parentheticals, base_text]        
        lists_of_clustered_indices = []

        for list_of_indices in lists_of_indices:
            if not list_of_indices:
                continue

            # We start off with a single cluster that is made up of the
            # first index. If the next index follows the first index, 
            # we continue the cluster. If it doesn't, we create a new cluster.
            clustered_indices = [[list_of_indices[0], list_of_indices[0] + 1]]
            
            for index in list_of_indices[1:]:
                if clustered_indices[-1][1] == index:
                    clustered_indices[-1][1] = index + 1
                else:
                    clustered_indices.append([index, index + 1])

            # Add Clustered Indices
            lists_of_clustered_indices.append(clustered_indices)
            
        if flatten:
            flattened_clusters = []
            # We are placing each cluster of indices into one list.
            # This removes the context of the larger parenthetical,
            # but the context may be cumbersome instead of useful.
            for list_of_clustered_indices in lists_of_clustered_indices:
                for clustered_indices in list_of_clustered_indices:
                    flattened_clusters.append(clustered_indices)
            lists_of_clustered_indices = flattened_clusters
        
        return lists_of_clustered_indices



    def separate_span_by_parenthetical(self, span):
        span_parentheticals = []
        
        # The clusters of the span represented with tuples of char indices
        # (e.g. [(0, 1), (1, 5), (5, 10)]. This is a list of clustered
        # indices (like above).
        text_clusters = self.get_parentheticals(span.text, flatten=True)
        
        for cluster in text_clusters:
            if span.text[cluster[0]:cluster[1]].isspace():
                continue

            l_char_index = span[0].idx + cluster[0]
            r_char_index = span[0].idx + cluster[1] - 1

            # Instead of having a tuple dictating the start and end of a cluster,
            # we can use a span -- it's much simpler.
            cluster_as_span = self.get_span_at_indices(l_char_index, r_char_index)
            if not cluster_as_span:
                continue
            
            span_parentheticals.append(cluster_as_span)

        return span_parentheticals



    def separate_spans_by_parenthetical(self, spans):
        all_span_parentheticals = []
        for span in spans:
            all_span_parentheticals.extend(self.separate_span_by_parenthetical(span))
        return all_span_parentheticals

    
 
    def singularize(self, string):
        string = string.lower()
        
        # The string to singularize should not have any
        # non-alphanumeric characters at the end, or else
        # the algorithm will not work.
        words = re.split(r" ", string)

        if not words:
            return [string]

        # If the last word in the string is a zero plural
        # or a singular irregular noun, there's no changes
        # to make. For example, "red sheep" and "ox" are 
        # already singular.
        if (
            words[-1] in self.zero_plural_nouns or 
            words[-1] in self.irregular_singular_nouns
        ):
            return [string]

        # If the last word in the string is an irregular
        # plural noun, we rely on a dictionary with the
        # corresponding mapping.
        if words[-1] in self.irregular_plural_nouns:
            words[-1] = self.irregular_nouns_ps[words[-1]]
            singulars = [self.delete_extra_whitespace(" ".join(words))]
            return singulars
        
        # We take the singular form of the last word and
        # add it back in to the other words. As there could
        # be multiple forms (due to uncertainty), we need to
        # include all possible versions.
        singulars = []
        singular_endings = self.get_singular(words[-1])

        if not singular_endings:
            return [string]
        
        for singular_ending in singular_endings:
            singular = self.delete_extra_whitespace(" ".join([*words[:-1], singular_ending]))
            singulars.append(singular)
            
        return singulars



    def get_singular(self, string):
        versions = []

        # Replace -ies with -y
        if re.fullmatch(r".*ies$", string):
            versions.append(f'{string[:-3]}y')
            return versions

        # Replace -ves with -f and -fe
        if re.fullmatch(r".*ves$", string):
            versions.append(f'{string[:-3]}f')
            versions.append(f'{string[:-3]}fe')
            return versions

        # Delete -es 
        if re.fullmatch(r".*es$", string):
            versions.append(f'{string[:-2]}')
            return versions

        # Replace -i with -us
        if re.fullmatch(r".*i$", string):
            versions.append(f'{string[:-1]}us')
            return versions

        # Delete -s
        if re.fullmatch(r".*s$", string):
            versions.append(f'{string[:-1]}')
            return versions

        return versions


    
    def pluralize(self, string):
        string = string.lower()
        
        # The string to pluralize should not have any
        # non-alphanumeric characters at the end, or else
        # the algorithm will not work.
        words = re.split(r" ", string)

        if not words:
            return [string]

        # If the last word in the string is a zero plural
        # or a plural irregular noun, there's no changes
        # to make. For example, "red sheep" and "oxen" are 
        # already singular.
        if (
            words[-1] in self.zero_plural_nouns or 
            words[-1] in self.irregular_plural_nouns
        ):
            return [string]

        # If the last word in the string is an irregular
        # singular noun, we rely on a dictionary with the
        # corresponding mapping.
        if words[-1] in self.irregular_singular_nouns:
            words[-1] = self.irregular_nouns_sp[words[-1]]
            return [self.delete_extra_whitespace(" ".join(words))]
        
        # We take the singular form of the last word and
        # add it back in to the other words. As there could
        # be multiple forms (due to error), we need to
        # handle them all.
        plurals = []
        plural_endings = self.get_plural(words[-1])

        if not plural_endings:
            return [string]
            
        for plural_ending in plural_endings:
            plural = self.delete_extra_whitespace(" ".join([*words[:-1], plural_ending]))
            plurals.append(plural)
            
        return plurals

    
  
    def get_plural(self, string):
        versions = []

        # Words that end with -us often have
        # two different plural versions: -es and -i.
        # For example, the plural version of cactus 
        # can be cactuses or cacti.
        if re.fullmatch(r".*us$", string):
            versions.append(f'{string}es')
            versions.append(f'{string[:-2]}i')
            return versions

        # The -es ending is added to the words below.
        if re.fullmatch(r".*([^l]s|sh|ch|x|z)$", string):
            versions.append(f'{string}es')
            return versions

        # Words that end with a consonant followed by 'y'
        # are made plural by replacing the 'y' with -ies.
        # For example, the plural version of canary is
        # canaries.
        if re.fullmatch(r".*([^aeiou])(y)$", string):
            versions.append(f'{string[:-1]}ies')
            return versions
            
        # The plural version of words ending with -f
        # and -fe aren't clear. To be safe, I will add
        # both versions.
        if (re.fullmatch(r".*(f)(e?)$", string) and not re.fullmatch(r".*ff$", string)):
            last_clean = re.sub(r"(f)(e?)$", "", string)
            versions.append(f'{last_clean}fs')
            versions.append(f'{last_clean}ves')
            return versions

        # People add -s or -es to words that end with 'o'.
        # To be safe, both versions are added.
        if re.fullmatch(r".*([^aeiou])o$", string):
            versions.append(f'{string}s')
            versions.append(f'{string}es')
            return versions

        # If there's no -s at the end of the string and
        # the other cases didn't run, we add an -s.
        if re.fullmatch(r".*[^s]$", string):
            versions.append(f'{string}s')
        
        return versions


 
    def expand_unit(self, *, il_unit, ir_unit, il_boundary, ir_boundary, speech=[], literals=[], include=True, direction='BOTH', verbose=False):
        UNIT = self.main.sp_doc[il_unit:ir_unit+1]
        
        if il_unit > ir_unit:
            print(f"Error: il_unit of {il_unit} greater than ir_unit of {ir_unit}")
            return None
        
        if direction in ['BOTH', 'LEFT'] and il_boundary > il_unit:
            print(f"Error: il_unit of {il_unit} less than il_boundary of {il_boundary}")
            return None
        
        if direction in ['BOTH', 'RIGHT'] and ir_boundary < ir_unit:
            print(f"Error: ir_unit of {ir_unit} greater than ir_boundary of {ir_boundary}")
            return None
        
        # Move Left
        if direction in ['BOTH', 'LEFT']:
            # The indices are inclusive, therefore, when 
            # the condition fails, il_unit will be equal
            # to il_boundary.
            while il_unit > il_boundary:
                # We assume that the current token is allowed,
                # and look to the token to the left.
                l_token = self.main.sp_doc[il_unit-1]

                # If the token is invalid, we stop expanding.
                in_set = l_token.pos_ in speech or l_token.lower_ in literals

                # Case 1: include=False, in_set=True
                # If we're not meant to include the defined tokens, and the
                # current token is in that set, we stop expanding.
                # Case 2: include=True, in_set=False
                # If we're meant to include the defined tokens, and the current
                # token is not in that set, we stop expanding.
                # Case 3: include=in_set
                # If we're meant to include the defined tokens, and the current
                # token is in that set, we continue expanding. If we're not meant
                # to include the defined tokens, and the current token is not
                # in that set, we continue expanding.
                if include ^ in_set:
                    break
                
                # Else, the left token is valid, and
                # we continue to expand.
                il_unit -= 1

        # Move Right
        if direction in ['BOTH', 'RIGHT']:
            # Likewise, when the condition fails,
            # ir_unit will be equal to the ir_boundary.
            # The ir_boundary is also inclusive.
            while ir_unit < ir_boundary:
                # Assuming that the current token is valid,
                # we look to the right to see if we can
                # expand.
                r_token = self.main.sp_doc[ir_unit+1]

                # If the token is invalid, we stop expanding.
                in_set = r_token.pos_ in speech or r_token.lower_ in literals
                if include ^ in_set:
                    break

                # Else, the token is valid and
                # we continue.
                ir_unit += 1

        assert il_unit >= il_boundary and ir_unit <= ir_boundary
        
        expanded_unit = self.main.sp_doc[il_unit:ir_unit+1]

        if verbose and VERBOSE_LEVEL >= 1:
            print(f"Expanded Unit of '{UNIT}': {expanded_unit}")
        
        return expanded_unit


    
    def contract_unit(self, *, il_unit, ir_unit, speech=[], literals=[], include=True, direction='BOTH', verbose=False):
        UNIT = self.main.sp_doc[il_unit:ir_unit+1]
        
        if il_unit > ir_unit:
            print(f"Error: il_unit of {il_unit} greater than ir_unit of {ir_unit}")
            return None
        
        # Move Right
        if direction in ['BOTH', 'LEFT']:
            while il_unit < ir_unit:
                # We must check if the current token is not allowed. If it's
                # not allowed, we contract (remove).
                token = self.main.sp_doc[il_unit]

                # include = True means that we want the tokens that match
                # the speech and/or literals in the contracted unit.
                
                # include = False means that we don't want the tokens that
                # match the speech and/or literals in the contracted unit.
                
                # Case 1: include = True, in_set = True
                # We have a token that's meant to be included in the set.
                # However, we're contracting, which means we would end up
                # removing the token if we continue. Therefore, we break.
                
                # Case 2: include = False, in_set = False
                # We have a token that's not in the set which defines the
                # tokens that aren't meant to be included. Therefore, we 
                # have a token that is meant to be included. If we continue,
                # we would end up removing this token. Therefore, we break.
                
                # Default:
                # If we have a token that's in the set (in_set=True) of
                # tokens we're not supposed to include in the contracted 
                # unit (include=False), we need to remove it. Likewise, if
                # we have a token that's not in the set (in_set=False) of
                # tokens to include in the contracted unit (include=True),
                # we need to remove it.
                
                in_set = token.pos_ in speech or token.lower_ in literals
                if include == in_set:
                    break

                # The token is valid, thus we continue.
                il_unit += 1

        # Move Left      
        if direction in ['BOTH', 'RIGHT']:
            while ir_unit > il_unit:
                token = self.main.sp_doc[ir_unit]

                # The token is invalid and we
                # stop contracting.
                in_set = token.pos_ in speech or token.lower_ in literals
                if include == in_set:
                    break

                # The token is valid and we continue.
                ir_unit -= 1

        assert il_unit <= ir_unit
        
        contracted_unit = self.main.sp_doc[il_unit:ir_unit+1]

        if verbose and VERBOSE_LEVEL >= 1:
            print(f"Contracted Unit of '{UNIT}': {contracted_unit}")
        
        return contracted_unit


    
    def find_unit_context(self, *, il_unit, ir_unit, il_boundary, ir_boundary, speech=["ADJ", "NOUN", "ADP", "ADV", "PART", "PROPN", "VERB", "PRON", "DET", "AUX", "PART", "SCONJ"], literals=[], include=True, enclosures=LAX_ENCLOSURES, comma_encloses=False, verbose=False):
        UNIT = self.main.sp_doc[il_unit:ir_unit+1]
        
        if il_unit > ir_unit:
            print(f"Error: il_unit of {il_unit} greater than ir_unit of {ir_unit}")
            return None
        
        if il_boundary > il_unit:
            print(f"Error: il_unit of {il_unit} less than il_boundary of {il_boundary}")
            return None
        
        if ir_boundary < ir_unit:
            print(f"Error: ir_unit of {ir_unit} greater than ir_boundary of {ir_boundary}")
            return None
        
        # Caveat: Parentheticals
        # The context of a unit inside a set of enclosures should
        # not go farther than the boundaries of those enclosures.
        # However, we need to manually determine whether the unit
        # is in parentheses (or any set of the matching symbols
        # below).
        openers = list(enclosures.keys())
        closers = list(enclosures.values())
        enclosing_chars = [*closers, *openers]

        # Look for Group Punctuation on the Left
        i = il_unit
        opener = None
        while i > il_boundary:
            token = self.main.sp_doc[i]
            if token.lower_ in enclosing_chars and token.lower_ != ",":
                opener = token
                break
            i -= 1

        # Look for Group Punctuation on the Right
        i = ir_unit
        closer = None
        while i < ir_boundary:
            token = self.main.sp_doc[i]
            if token.lower_ in enclosing_chars and token.lower_ != ",":
                closer = token
                break
            i += 1

        # If there's a group punctuation on the left
        # and right, and they match each other (e.g. '(' and ')'),
        # we return the text between the punctuations.
        parenthetical = opener and closer and enclosures.get(opener.lower_) == closer.text
        if parenthetical:
            context = [t for t in self.main.sp_doc[opener.i:closer.i+1]]
            
            if verbose and VERBOSE_LEVEL >= 1:
                print(f"Parenthetical - Unit Context of '{UNIT}': {context}")
            
            return context

        # We can also check whether the unit it enclosed
        # in a comma or two, only if a comma can enclose.
        if comma_encloses:
            i = il_unit
            i_token = self.main.sp_doc[i]
            while i > il_boundary:
                i_token = self.main.sp_doc[i]
                if i_token.lower_ in [",", ";", "—"]:
                    break
                i -= 1

            j = ir_unit
            j_token = self.main.sp_doc[j]
            while j < ir_boundary:
                j_token = self.main.sp_doc[j]
                if j_token.lower_ in [",", ";", "—"]:
                    break
                j += 1

            if (i_token and i_token.lower_ == ",") or (j_token and j_token.lower_ == ","):
                context = [t for t in self.main.sp_doc[i:j+1]]
            
                if verbose and VERBOSE_LEVEL >= 1:
                    print(f"Comma - Unit Context of '{UNIT}': {context}")
                    
                return context
            
        # As the unit is not a parenthetical, we will expand
        # outwards until we run into a stopping token. The exclude
        # list contains tokens that should be excluded from the
        # context. Currently, it will contain any parentheticals
        # that we run into.
        exclude = []

        # We can modify the enclosures after handling the parenthetical
        # situation to make the code easier.
        if comma_encloses:
            enclosures[","] : ","
        
        # Expand Left
        while il_unit > il_boundary:
            # Assuming that the current token is fine,
            # we look to the left.
            l_token = self.main.sp_doc[il_unit-1]

            if l_token.lower_ not in closers:
                in_set = l_token.pos_ in speech or l_token.lower_ in literals
                if in_set ^ include:
                    break
                il_unit -= 1
            # If it's a closing enclosure (e.g. ')', ']'),
            # we need to skip over whatever is contained in
            # that punctuation.
            else:
                i = il_unit - 1
                
                token = self.main.sp_doc[i]
                exclude.append(token)

                # We continue until we reach the boundary or
                # we find the matching opening character.
                closed = []
                
                while i > il_boundary:
                    token = self.main.sp_doc[i]
                    # Found Closer
                    if token.lower_ in closers:
                        exclude.append(token)
                        closed.append(token.lower_)
                    # Currently Closed
                    elif closed:
                        exclude.append(token)
                        # Found Opener
                        if enclosures.get(token.lower_) == closed[-1]:
                            closed.pop()
                    else:
                        break
                    i -= 1
                
                il_unit = i

        # Expand Right
        while ir_unit < ir_boundary:
            # We're checking the token to the right
            # to see if we can expand or not.
            r_token = self.main.sp_doc[ir_unit+1]

            if r_token.lower_ not in openers:
                in_set = r_token.pos_ in speech or r_token.lower_ in literals
                if in_set ^ include:
                    break
                ir_unit += 1
            # If the token to the right is an opener (e.g. '(', '['), we must skip
            # it, the parenthetical inside, and the closer.
            else:
                i = ir_unit + 1
                
                token = self.main.sp_doc[i]
                exclude.append(token)

                # We continue until we reach the boundary or
                # we find all the closers for the openers.
                opened = []
                
                while i < ir_boundary:
                    token = self.main.sp_doc[i]
                    # Found Opener
                    if token.lower_ in openers:
                        exclude.append(token)
                        opened.append(token.lower_)
                    # Currently Opened
                    elif opened:
                        exclude.append(token)
                        # Found Closer
                        if token.lower_ == enclosures.get(opened[-1]):
                            opened.pop()
                    else:
                        break
                    i += 1
                
                ir_unit = i
        
        # We remove the excluded tokens and return the context.
        context = [t for t in self.main.sp_doc[il_unit:ir_unit+1] if t not in exclude]

        if verbose and VERBOSE_LEVEL >= 1:
            print(f"Unit Context of '{UNIT}': {context}")
        
        return context


    
    def get_span_at_indices(self, l_index, r_index):
        text = self.main.sp_doc.text.lower()

        while text[l_index].isspace():
            l_index += 1

        while text[r_index].isspace():
            r_index -= 1

        if l_index > r_index:
            print(f"Error: l_index of {l_index} greater than r_index of {r_index}")
            return None
            
        l_token_i = self.main.token_at_char(l_index).i
        r_token_i = self.main.token_at_char(r_index).i
        
        return self.main.sp_doc[l_token_i:r_token_i+1]


    
    def get_base_nouns(self, span, return_tokens=False, immediate_stop=False):
        ending_nouns = []
        
        reversed_span = [t for t in span]
        reversed_span.reverse()
        
        for token in reversed_span:
            if token.pos_ in ["NOUN", "PROPN"]:
                ending_nouns.append(token if return_tokens else self.main.sp_doc[token.i:token.i+1])
                if immediate_stop:
                    break
            else:
                break

        return ending_nouns



    def flatten(self, arr):
        flat_arr = []

        if not isinstance(arr, list):
            return [arr]

        for element in arr:
            flat_arr.extend(self.flatten(element))

        return flat_arr


    def is_same_text(self, sp_a, sp_b):
        sp_b_text = sp_b.text.lower()
        sp_a_text = sp_a.text.lower()

        if sp_a_text == sp_b_text:
            return True
            
        sp_a_singular_texts = [sp_a_text] if sp_a[-1].tag_ in ["NN", "NNP"] else self.main.singularize(sp_a_text)
        sp_b_singular_texts = [sp_b_text] if sp_b[-1].tag_ in ["NN", "NNP"] else self.main.singularize(sp_b_text)

        if set(sp_a_singular_texts).intersection(sp_b_singular_texts):
            return True
        return False



    def has_same_base_nouns(self, sp_a, sp_b):
        sp_b_text = sp_b.text.lower()
        sp_b_0_text = sp_b[0].lower_
        sp_b_0_is_noun = sp_b[0].pos_ in ["NOUN", "PROPN"]
        
        sp_b_nouns = []
        sp_b_num_adjectives = 0
        
        for token in sp_b:
            if not sp_b_nouns and token.pos_ == "ADJ":
                sp_b_num_adjectives += 1
            elif token.pos_ in ["PROPN", "NOUN"]:
                sp_b_nouns.append(token)

        if not sp_b_nouns:
            return False

        sp_b_nouns_text = [noun.lower_ for noun in sp_b_nouns]
        sp_b_singular_texts = [" ".join(sp_b_nouns_text)] if sp_b_nouns[-1].tag_ in ["NN", "NNP"] else self.main.singularize(" ".join(sp_b_nouns_text))

        sp_a_text = sp_a.text.lower()
        sp_a_0_text = sp_a[0].lower_
        sp_a_0_is_noun = sp_a[0].pos_ in ["NOUN", "PROPN"]

        # Case Example: 'Hyla' v. 'Hyla tadpoles'
        if sp_a_0_text == sp_b_0_text and (sp_a_0_is_noun or sp_b_0_is_noun):
            if sp_a_text in sp_b_text or sp_b_text in sp_a_text:
                return True
        
        # Case Example: 'dogs' v. 'red dogs'
        sp_a_nouns = []
        sp_a_num_adjectives = 0
        for token in sp_a:
            if not sp_a_nouns and token.pos_ == "ADJ":
                sp_a_num_adjectives += 1
            elif token.pos_ in ["PROPN", "NOUN"]:
                sp_a_nouns.append(token)
        
        if not sp_a_nouns:
            return False
        
        sp_a_nouns_text = [noun.lower_ for noun in sp_a_nouns]
        
        if sp_a_nouns and sp_b_nouns and (
            (sp_a_num_adjectives == 1 and sp_b_num_adjectives == 0) or 
            (sp_b_num_adjectives == 1 and sp_a_num_adjectives == 0)
        ):
            sp_a_singular_texts = [" ".join(sp_a_nouns_text)] if sp_a_nouns[-1].tag_ in ["NN", "NNP"] else self.main.singularize(" ".join(sp_a_nouns_text))
            if set(sp_a_singular_texts).intersection(sp_b_singular_texts):
                return True

        return False



    def distinct_bounds(self, bounds, larger=True):
        dounds = []
    
        for bound in bounds:
            overlap = False
            
            for i, dound in enumerate(dounds):
                surround = bound[0] <= dound[0] <= bound[1] and bound[0] <= dound[1] <= bound[1]
                contains = dound[0] <= bound[0] <= dound[1] and dound[0] <= bound[1] <= dound[1]
                overlaps = surround or contains
                
                if (surround and larger) or (contains and not larger):
                    dounds[i] = bound
                    
            if not overlaps:
                dounds.append(bound)
    
        return list(set(dounds))