In [None]:
import spacy
import stanza
import textacy

In [None]:
from fastcoref import FCoref
from spacy.matcher import Matcher

In [None]:
from taxonerd import TaxoNERD

In [None]:
!pip install https://github.com/nleguillarme/taxonerd/releases/download/v1.5.4/en_ner_eco_md-1.1.0.tar.gz
!pip install https://github.com/nleguillarme/taxonerd/releases/download/v1.5.4/en_ner_eco_biobert-1.1.0.tar.gz
# !pip install https://github.com/nleguillarme/taxonerd/releases/download/v1.5.4/en_ner_eco_md_weak-1.1.0.tar.gz
# !pip install https://github.com/nleguillarme/taxonerd/releases/download/v1.5.4/en_ner_eco_biobert_weak-1.1.0.tar.gz

In [16]:
%run Possession2.ipynb

<spacy.matcher.dependencymatcher.DependencyMatcher object at 0x000002404A54FA00>
Sentence: The dog, whose tail wagged excitedly, was greeted warmly by his owner.
	Match 14499690083660312615 - Pattern 1
		owned: tail
		owner: whose

	Match 14499690083660312615 - Pattern 1
		owned: owner
		owner: his


Sentence: Because the children's toys were scattered everywhere, their mother asked them to tidy up.
	Match 14499690083660312615 - Pattern 1
		owned: toys
		owner: children

	Match 14499690083660312615 - Pattern 1
		owned: mother
		owner: their


Sentence: While I was at my friend's house, I saw his collection of vintage cars.
	Match 14499690083660312615 - Pattern 1
		owned: friend
		owner: my

	Match 14499690083660312615 - Pattern 1
		owned: house
		owner: friend

	Match 14499690083660312615 - Pattern 1
		owned: collection
		owner: his

	Match 14621589392117008497 - Pattern 2
		owned: collection
		adp: of
		owner: cars

	Match 12054268835912785357 - Pattern 3
		verb: saw
		owner: I
		owne

In [None]:
abstract = "This investigation examines the role of trait-mediated indirect interactions in a simple aquatic food web. We conducted the experiments in cattle watering tanks in order to establish whether competitive and predator-prey interactions between two species are affected by other species in the system; i.e., are pairwise interaction strengths affected by the background species assemblage? We examined the survival and growth response of small bullfrog (Rana catesbeiana) and small green frog (Rana clamitans) tadpoles in the presence and absence of a competitor (large bullfrogs), the lethal presence of the larval odonate predator Tramea lacerata,and the nonlethal (caged) presence of the larval odonate predators Anax junius and Anax longipes. We demonstrate that large bullfrog competitors and caged Anax affect traits (foraging activity level) of small bullfrog and small green frog tadpoles and that these changes in traits, in turn, affect interactions of the small tadpole species with each other and with the other species. In particular, the following four trait- mediated indirect interactions were evident: (1) Presence of large bullfrog competitors increased the predation rate of Trameaon small green frogs and small bullfrogs. (2) Presence of nonlethal Anax reduced the predation rate of Tramea on small green frogs. (3) Presence of nonlethal Anax increased the competitive advantage of bullfrogs over green frogs. (4) Presence of nonlethal Anax facilitated midge invasion of the experimental units. The pro- posed mechanisms (changes in small tadpole activity) involved in these trait-mediated indirect interactions were supported by observational data on tadpole activity and resource levels in the experimental units, and in laboratory experiments examining tadpole activity responses to predators. The occurrence of strong trait-mediated indirect interactions in this simple food web underscores the potential importance of such interactions in animal communities."

In [None]:
def clean(abstract):
    return abstract

In [None]:
sp_nlp = spacy.load("en_core_web_sm")
st_nlp = stanza.Pipeline(lang='en', processors='tokenize')

In [None]:
fcoref = FCoref(enable_progress_bar=False)

In [None]:
taxonerd = TaxoNERD()
tn_nlp = taxonerd.load(model="en_ner_eco_biobert")

In [None]:
# Retrieves the token at the given index.
# should be mapped to avoid having to do this for loop multiple times rather than a simple lookup
def token_at_char_index(sp_doc, index):
    for token in sp_doc:
        if token.idx == index:
            return token
    return None

In [43]:
# Retrieves the clusters for a list of words.
def get_clusters_and_noun_chunks(sp_doc, tokens, clusters_mapped, noun_chunks_mapped, possessions_mapped):
    token_indices = [token.idx for token in tokens]
    all_clusters = []
    all_noun_chunks = []
    all_possessions = []
    for token_index in token_indices:
        # Clusters
        if token_index in clusters_mapped:
            for cluster_token_index in clusters_mapped[token_index]:
                all_clusters.append(token_at_char_index(sp_doc, cluster_token_index[0]))
        # Noun Chunks
        if token_index in noun_chunks_mapped:
            for token in noun_chunks_mapped[token_index]:
                all_noun_chunks.append(token)
        # Possessions
        if token_index in possessions_mapped:
            for token in possessions_mapped[token_index]:
                all_possessions.append(token)
    return all_clusters, all_noun_chunks, all_possessions

In [55]:
def species_or_trait(sp_doc, tn_doc, words, clusters, noun_chunks, possessions):
    # print(f"Words: {words}\nClusters: {clusters}\nNouns: {noun_chunks}")
    species_indices = []
    for species_span in tn_doc.ents:
        for species in species_span:
            species_indices.append(species.idx)
    for token in [*words, *clusters, *noun_chunks, *possessions]:
        if token.idx in species_indices:
            return True
    return False

In [56]:
def has_tmim_example(abstract):
    abstract = clean(abstract)
    
    tn_doc = tn_nlp(abstract)
    # print(tn_doc.ents)
    
    sp_doc = sp_nlp(abstract)
    coreferences = fcoref.predict(texts=[abstract])
    
    # print(f"Clusters: {[sentence.get_clusters(as_strings=True) for sentence in fcoref.predict(texts=[abstract])]}\n")
    clusters_mapped = {}
    for sentence in coreferences:
        clusters = sentence.get_clusters(as_strings=False)
        for cluster in clusters:
            for text in cluster:
                clusters_mapped[text[0]] = cluster
    # print(f"Clusters Mapped: {clusters_mapped}\n")
    
    noun_chunks = sp_doc.noun_chunks
    # print(f"Noun Chunks: {noun_chunks}")
    noun_chunks_mapped = {}
    for noun_chunk in noun_chunks:
        # print(noun_chunk)
        for word in noun_chunk:
            noun_chunks_mapped[word.idx] = noun_chunk
    # print(f"Noun Chunks Mapped: {noun_chunks_mapped}")

    matcher = DependencyMatcher(sp_nlp.vocab)
    for pattern_id, pattern in patterns.items():
        matcher.add(pattern_id, [pattern])
    possessions = matcher(sp_doc)
    possessions_mapped = {}
    for match_id, token_ids in possessions:
        pattern_id = sp_nlp.vocab.strings[match_id]
        # print(f"\tMatch {match_id} - Pattern {pattern_id[-1]}")

        owner = None
        owned = None
        for i in range(len(token_ids)):
            right_id = patterns[pattern_id][i]['RIGHT_ID']
            if right_id == OWNER:
                owner = sp_doc[token_ids[i]]
            if right_id == OWNED:
                owned = sp_doc[token_ids[i]]
            # print(f"\t\t{patterns[pattern_id][i]['RIGHT_ID']}: {sp_doc[token_ids[i]].text}")
        if owner.idx not in possessions_mapped:
            possessions_mapped[owner.idx] = []
        possessions_mapped[owner.idx].append(owned)

        if owned.idx not in possessions_mapped:
            possessions_mapped[owned.idx] = []
        possessions_mapped[owned.idx].append(owner)
        # print()
    # print()
    # print(possessions_mapped)

    found_tmim_example = False
    for sentence in sp_doc.sents:
        print(f"Sentence: {sentence.text}")
        svo_triples = textacy.extract.subject_verb_object_triples(sp_doc)
        for svo_triple in svo_triples:
            # print(svo_triple)
            subject_clusters, subject_noun_chunks, subject_possessions = get_clusters_and_noun_chunks(sp_doc, svo_triple.subject, clusters_mapped, noun_chunks_mapped, possessions_mapped)
            valid_sub = species_or_trait(sp_doc, tn_doc, svo_triple.subject, subject_clusters, subject_noun_chunks, subject_possessions)
    
            object_clusters, object_noun_chunks, object_possessions = get_clusters_and_noun_chunks(sp_doc, svo_triple.object, clusters_mapped, noun_chunks_mapped, possessions_mapped)
            valid_obj = species_or_trait(sp_doc, tn_doc, svo_triple.object, object_clusters, object_noun_chunks, object_possessions)

            if valid_sub and valid_obj:
                print(f"\tFOUND: {sentence.text}")
                found_tmim_example = True
    return found_tmim_example

In [57]:
has_tmim_example(abstract)

04/06/2025 18:30:48 - INFO - 	 Tokenize 1 inputs...
Map: 100%|██████████| 1/1 [00:00<00:00, 18.33 examples/s]
04/06/2025 18:30:48 - INFO - 	 ***** Running Inference on 1 texts *****


Sentence: This investigation examines the role of trait-mediated indirect interactions in a simple aquatic food web.
	FOUND: This investigation examines the role of trait-mediated indirect interactions in a simple aquatic food web.
	FOUND: This investigation examines the role of trait-mediated indirect interactions in a simple aquatic food web.
Sentence: We conducted the experiments in cattle watering tanks in order to establish whether competitive and predator-prey interactions between two species are affected by other species in the system; i.e., are pairwise interaction strengths affected by the background species assemblage?
	FOUND: We conducted the experiments in cattle watering tanks in order to establish whether competitive and predator-prey interactions between two species are affected by other species in the system; i.e., are pairwise interaction strengths affected by the background species assemblage?
	FOUND: We conducted the experiments in cattle watering tanks in order to e

True