In [1]:
import csv
import requests
from pyalex import Works
from itertools import chain

In [2]:
# Observations:
# The inclusion of the dash made no difference to the number of works returned 
# Both https://api.openalex.org/works?search=trait%20mediated and https://api.openalex.org/works?search=trait-mediated returned 683442 results.
# I don't think that you should need to include the wildcard character (*) as stemming is supposedly used in the search, but I did get different
# results when trying "dog" versus "dogs". This could be because searching "dog" returns the papers that have "dog" and "dogs", while searcing "dogs"
# returns the papers that have "dogs" -- something to that effect. So, I will use the singular version. Apparently, regex characters (*, ?, ~) are removed.
# OpenAlex looks for "matches in titles, abstracts, and fulltext" [1]. Below, I'm only using the title, at least I think. I may have changed it later on
# and forgot to update this sentence.
# 1. https://docs.openalex.org/how-to-use-the-api/get-lists-of-entities/search-entities

# Level 1 Keywords
keywords_1 = [
    "trait",
    "phenotype"
]

# Level 2 Keywords
keywords_2 = [
    "trait-mediated",
    "higher-order interaction",
    "polymorphism",
    "interaction modification",
    "indirect effect"
]

# Level 3 Keywords
keywords_3 = [
    "apparent competition",
    "resource competition",
    "keystone predation",
    "intraguild predation",
    "intransitive competition",
    "trophic chain",
    "competition chain",
    "mutual competition"
]

In [3]:
# OpenAlex doesn't store the literal abstract due to legal concerns.
# I don't have the function they use to revert the abstract either,
# so here's a function that should be able to do that.

def revert_abstract(inverted_abstract):
    if not inverted_abstract:
        return ""
    abstract = ""
    i = 0
    while True:
        index_found = False
        for k, v in inverted_abstract.items():
            # print(k, v)
            if i in v:
                if abstract and k not in [".", ",", "?", "!"]:
                    abstract += " "
                abstract += k
                i += 1
                index_found = True
        if not index_found:
            break
    return abstract

In [4]:
search_1 = "|".join(keywords_1)
print(f"Search for Level 1 Keywords: {search_1}")
print()

num_pages = 0
max_pages = 50
works = Works().search_filter(title=search_1)
pager = works.paginate(per_page=200)

print(f"Count: {works.count()}")
for page in pager:
    for work in page:
        if num_pages > max_pages:
            break
        print(work['title'])
        num_pages += 1
    if num_pages > max_pages:
        break

# The first 50 papers should be returned. You'll see that either the word "trait*" or the word "phenotype*" is mentioned.
# However, as discussed, those keywords are very broad and include papers that are not ecology-related. So, I will filter
# out said papers (these papers that include a level 1 keyword) by looking for a level 2 keyword. This will then be repeated
# with the level 3 keywords. This is basically looking for [Level 1 Keyword 1 || ... || Level 1 Keyword x] && [Level 2 Keyword 1 || ... || Level 2 Keyword y] && [Level 3 Keyword 1 || ... || Level 3 Keyword z].
# At least, I think. I could be wrong. But, I'll print as I go to check.

data = [["Title", "DOI", "Abstract"], *list(map(lambda d: [d["title"], d["doi"], d["abstract"]], chain(*works.paginate(per_page=200))))]
with open('output1.csv', 'w', encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerows(data)

Search for Level 1 Keywords: trait|phenotype

Count: 387635
Manual for the State-Trait Anxiety Inventory
Frailty in Older Adults: Evidence for a Phenotype
Genetics and Analysis of Quantitative Traits
Targeted gene expression as a means of altering cell fates and generating dominant phenotypes
The Big Five Trait taxonomy: History, measurement, and theoretical perspectives.
The UK Biobank resource with deep phenotyping and genomic data
GCTA: A Tool for Genome-wide Complex Trait Analysis
TASSEL: software for association mapping of complex traits in diverse samples
Empirical threshold values for quantitative trait mapping.
Genetics and analysis of quantitative traits
The Metabolic Phenotype in Obesity: Fat Mass, Body Fat Distribution, and Adipose Tissue Function
Computational Radiomics System to Decode the Radiographic Phenotype
The Extended Phenotype
Let the concept of trait be functional!
The structure of phenotypic personality traits.
Genetic Dissection of Complex Traits
Tensional homeo

In [5]:
search_1 = "|".join(keywords_1)
search_2 = "|".join(keywords_2)

print(f"Search for Level 1 Keywords: {search_1}")
print(f"Search for Level 2 Keywords: {search_2}")
print()

num_pages = 0
max_pages = 50
works = Works().search_filter(title=search_1).filter(title={"search": search_2})
pager = works.paginate(per_page=200)
print(f"Count: {works.count()}")
for page in pager:
    for work in page:
        if num_pages > max_pages:
            break
        print(work['title'])
        num_pages += 1
    if num_pages > max_pages:
        break

# As can be seen, it seems to work. Each paper contains at least one of the level 1 keywords and at least one of the level 2 keywords.
# However, there are still some papers that are irrelevant. Perhaps with the third keyword level, they will be filtered out.
# Using the zero-shot classifier may also help with this. I'll also check the sizes of the papers returned at each level to make
# sure it's working properly (the set of papers returned should decrease or remain the same as each level is added to the search).

data = [["Title", "DOI", "Abstract"], *list(map(lambda d: [d["title"], d["doi"], d["abstract"]], chain(*works.paginate(per_page=200))))]
with open('output2.csv', 'w', encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerows(data)

Search for Level 1 Keywords: trait|phenotype
Search for Level 2 Keywords: trait-mediated|higher-order interaction|polymorphism|interaction modification|indirect effect

Count: 8831
Association of Anxiety-Related Traits with a Polymorphism in the Serotonin Transporter Gene Regulatory Region
A REVIEW OF TRAIT-MEDIATED INDIRECT INTERACTIONS IN ECOLOGICAL COMMUNITIES
Trophic cascades: the primacy of trait‐mediated indirect interactions
Resolution of quantitative traits into Mendelian factors by using a complete linkage map of restriction fragment length polymorphisms
Dopamine D4 receptor (D4DR) exon III polymorphism associated with the human personality trait of Novelty Seeking
The contribution of trait-mediated indirect effects to the net effects of a predator
CONNECTING THEORETICAL AND EMPIRICAL STUDIES OF TRAIT-MEDIATED INTERACTIONS
The Catechol-O-Methyltransferase Polymorphism: Relations to the Tonic–Phasic Dopamine Hypothesis and Neuropsychiatric Phenotypes
Fatal Familial Insomnia and

In [6]:
# I am using requests instead of the OpenAlex interface because either I am missing some important knowledge
# that's causing the returned papers to be faulty or there is a bug. Either way, using the API directly works.
# Possibly. So, I'll do that.

# As you can see below, there's only 6 papers (assuming nothing is wrong) that contain at least one keyword from each level.
# Perhaps using only levels 1 and 2 with zero-shot classification could capture more papers while removing the extraneous ones.
# Looking closely, there is an issue. The last paper does not have a keyword from level 2 or level 3. But, maybe it's just how the
# searching works? It has the words "trophic", "trait", and "-mediated" so maybe it counts in some way.

search_1 = "|".join(keywords_1).replace(" ", "%20")
search_2 = "|".join(keywords_2).replace(" ", "%20")
search_3 = "|".join(keywords_3).replace(" ", "%20")

print(f"Search for Level 1 Keywords: {search_1}")
print(f"Search for Level 2 Keywords: {search_2}")
print(f"Search for Level 3 Keywords: {search_3}")
print()

def get_page(page):
    url = f"https://api.openalex.org/works?page={page}&filter=title.search:{search_1},title.search:{search_2},title.search:{search_3}"
    print(f"URL: {url}\n")
    data = requests.get(url).json()
    return data['results']

page = 1
papers = []
while True:
    page_papers = get_page(page)
    if len(page_papers) <= 0:
        break
    papers += page_papers
    page += 1

print(f"Count: {len(papers)}\n")
for i in range(0, min(50, len(papers))):
    print(papers[i]['title'])

csv_data = [["Title", "DOI", "Abstract"], *list(map(lambda d: [d["title"], d["doi"], revert_abstract(d["abstract_inverted_index"])], papers))]
with open('output3.csv', 'w', encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerows(csv_data)

# There are now 153 papers.

Search for Level 1 Keywords: trait|phenotype
Search for Level 2 Keywords: trait-mediated|higher-order%20interaction|polymorphism|interaction%20modification|indirect%20effect
Search for Level 3 Keywords: apparent%20competition|resource%20competition|keystone%20predation|intraguild%20predation|intransitive%20competition|trophic%20chain|competition%20chain|mutual%20competition

URL: https://api.openalex.org/works?page=1&filter=title.search:trait|phenotype,title.search:trait-mediated|higher-order%20interaction|polymorphism|interaction%20modification|indirect%20effect,title.search:apparent%20competition|resource%20competition|keystone%20predation|intraguild%20predation|intransitive%20competition|trophic%20chain|competition%20chain|mutual%20competition

URL: https://api.openalex.org/works?page=2&filter=title.search:trait|phenotype,title.search:trait-mediated|higher-order%20interaction|polymorphism|interaction%20modification|indirect%20effect,title.search:apparent%20competition|resource%20com

In [7]:
# There's only 6 papers after using the third level of keywords which could mean that it needs to be broader.
# Here's the level 3 keywords where each query that contained more than one word (e.g. "resource competition") is broken down.
# I'll try the search with these keywords.
simple_keywords_3 = list(set(" ".join(keywords_3).split(" ")))
print(simple_keywords_3)

['resource', 'trophic', 'intraguild', 'mutual', 'keystone', 'intransitive', 'apparent', 'competition', 'predation', 'chain']


In [8]:
search_1 = "|".join(keywords_1).replace(" ", "%20")
search_2 = "|".join(keywords_2).replace(" ", "%20")
search_3 = "|".join(simple_keywords_3).replace(" ", "%20")

print(f"Search for Level 1 Keywords: {search_1}")
print(f"Search for Level 2 Keywords: {search_2}")
print(f"Search for Level 3 Keywords: {search_3}")
print()

def get_page(page):
    url = f"https://api.openalex.org/works?page={page}&filter=title.search:{search_1},title.search:{search_2},title.search:{search_3}"
    print(f"URL: {url}\n")
    data = requests.get(url).json()
    return data['results']

page = 1
papers = []
while True:
    page_papers = get_page(page)
    if len(page_papers) <= 0:
        break
    papers += page_papers
    page += 1

print(f"Count: {len(papers)}\n")
for i in range(0, min(50, len(papers))):
    print(papers[i]['title'])

csv_data = [["Title", "DOI", "Abstract"], *list(map(lambda d: [d["title"], d["doi"], revert_abstract(d["abstract_inverted_index"])], papers))]
with open('output3-simple.csv', 'w', encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerows(csv_data)

# There are now 153 papers.

Search for Level 1 Keywords: trait|phenotype
Search for Level 2 Keywords: trait-mediated|higher-order%20interaction|polymorphism|interaction%20modification|indirect%20effect
Search for Level 3 Keywords: resource|trophic|intraguild|mutual|keystone|intransitive|apparent|competition|predation|chain

URL: https://api.openalex.org/works?page=1&filter=title.search:trait|phenotype,title.search:trait-mediated|higher-order%20interaction|polymorphism|interaction%20modification|indirect%20effect,title.search:resource|trophic|intraguild|mutual|keystone|intransitive|apparent|competition|predation|chain

URL: https://api.openalex.org/works?page=2&filter=title.search:trait|phenotype,title.search:trait-mediated|higher-order%20interaction|polymorphism|interaction%20modification|indirect%20effect,title.search:resource|trophic|intraguild|mutual|keystone|intransitive|apparent|competition|predation|chain

URL: https://api.openalex.org/works?page=3&filter=title.search:trait|phenotype,title.search:trait-medi