In [1]:
import os
import csv
import time
import json
import math
import glob
import random
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from fake_useragent import UserAgent

In [3]:
options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("user-data-dir=C:\\Users\\lbeln\\AppData\\Local\\Google\\Chrome\\User Data")
options.add_argument("--profile-directory=Profile 1")
options.add_argument(f"--user-agent={UserAgent().random}")

driver = webdriver.Chrome(options=options)
driver.get("https://www.google.com")

In [4]:
def format_search(q):
    return '%20'.join(q.split())

In [5]:
def click_element(locator, number_clicks=1, base_wait_time=5, random_wait_time=5):
    for click in range(0, number_clicks):
        time.sleep(base_wait_time + random.randrange(0, random_wait_time))
        try:
            element = WebDriverWait(driver, 5).until(
                EC.presence_of_element_located(locator)
            )
            actions = ActionChains(driver)
            actions.move_to_element(element)
            actions.click(element)
            actions.perform()
        except:
            break
    time.sleep(base_wait_time + random.randrange(0, random_wait_time))

In [6]:
def click_element_while(locator):
    while True:
        time.sleep(random.randrange(0, 20))
        try:
            element = WebDriverWait(driver, 5).until(
                EC.presence_of_element_located(locator)
            )
            actions = ActionChains(driver)
            actions.move_to_element(element)
            actions.click(element)
            actions.perform()
        except:
            break
        time.sleep(random.randrange(0, 20))

In [7]:
def element_exists(locator1, locator2):
    try:
        element = driver.find_element(locator1, locator2)
        return element is not None
    except:
        return False

In [8]:
class Colors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

In [9]:
data = {}
hrefs = {}
visited = []

In [10]:
def store_search_results():
    titles = []
    search_results = driver.find_elements(By.XPATH, "//a[@data-testid='search-result']")
    if search_results is None:
        return titles
    for search_result in search_results:
        href = search_result.get_attribute("href")
        title = search_result.find_element(By.TAG_NAME, "h2").text
        hrefs[title] = href
        titles.append(title)
    return titles

In [11]:
def next_file_number():
    os.chdir("./")
    max_number = -1
    prefix_length = len("WebScrapingData")
    for file in glob.glob("WebScrapingData*.csv"):
        file_number = int(file[prefix_length:-4])
        if file_number > max_number:
            max_number = file_number
    return max_number + 1
file_number = next_file_number()
data_file_name = f"./WebScrapingData{file_number}.csv"
dump_file_name = f"./WebScrapingDump{file_number}.txt"

In [12]:
def save_data():
    with open(data_file_name, 'w', newline='', encoding='utf-8') as file:
        fieldnames = ['Title', 'Abstract', 'Links', 'Citations', 'References']
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(list(data.values()))
    
    with open(dump_file_name, 'w', newline='', encoding='utf-8') as file:
        json.dump(data, file)

In [13]:
queries = [
    "Do trait-mediated interactions affect predators?"
]

for query in queries:
    driver.get(f"https://consensus.app/results/?q={format_search(query)}")
    click_element((By.XPATH, "//button[@data-testid='loadmore-button']"), 2)
    store_search_results()

In [14]:
print("Root Pages (URLs):")
for title, href in hrefs.items():
    print(f"Title: {title}\nHREF: {href}\n\n")

Root Pages (URLs):
Title: The contribution of trait-mediated indirect effects to the net effects of a predator
HREF: https://consensus.app/papers/the-contribution-of-traitmediated-indirect-effects-to-the-peacor-werner/0bd5bcdaf0c35b06920ba945b77f8241/?extracted-answer=Nonlethal+predator+effects+can+contribute+strongly+to+the+net+indirect+effects+of+predators%2C+with+effects+comparable+to+those+resulting+from+killing+prey.&q=Do+trait-mediated+interactions+affect+predators%3F


Title: Susceptibility to Predation Affects Trait-Mediated Indirect Interactions by Reversing Interspecific Competition
HREF: https://consensus.app/papers/susceptibility-to-predation-affects-traitmediated-mowles-rundle/488b90f9ab9e55f9975415571c3daabc/?extracted-answer=Susceptibility+to+predation+can+influence+species+interactions+by+reversing+interspecific+competition.&q=Do+trait-mediated+interactions+affect+predators%3F


Title: Predator and prey functional traits: understanding the adaptive machinery driving pre

In [15]:
stop_size = 10
auto_save_size = 1

while len(data) <= stop_size:
    changed = False
    hrefs_items = hrefs.copy().items()
    for href_title, href in hrefs_items:
        if href_title in visited:
            continue
        driver.get(href)

        title = driver.find_element(By.TAG_NAME, "h1")
        if title is None:
            raise ValueError("Title is None")
        if href_title != title.text:
            raise ValueError(f"Inconsistent Titles: '{href_title}' and '{title.text}'")
        
        print(f"\tSearching Paper:\n\t\tTitle: '{title.text}'")

        abstract = driver.find_element(By.XPATH, "//p[@data-testid='abstract']")
        if abstract is None:
            raise ValueError("Abstract is None")

        click_element((By.XPATH, "//button[@aria-label='Additional Links']"))
        links = WebDriverWait(driver, 20).until(
            EC.presence_of_all_elements_located((
                By.XPATH, "//a[@role='menuitem']"
            ))
        )
        if links:
            links = [link.get_attribute("href") for link in links if link is not None]
        else:
            links = []

        citations_open = element_exists(By.XPATH, "//button[@data-testid='tab-citations-selected']")
        citations_close = element_exists(By.XPATH, "//button[@data-testid='tab-citations-not-selected']")

        citations = []
        if citations_open or citations_close:
            if citations_close:
                click_element((By.XPATH, "//button[@data-testid='tab-citations-not-selected']"))
            click_element((By.XPATH, "//button[@data-testid='loadmore-button-Citations']"))
            citations = store_search_results()
        
        print("\t\tCitations:")
        for i, citation in enumerate(citations):
            print(f"\t\t\t{i+1}. '{citation}'")

        references_open = element_exists(By.XPATH, "//button[@data-testid='tab-references-selected']")
        references_close = element_exists(By.XPATH, "//button[@data-testid='tab-references-not-selected']")
        
        references = []
        if references_open or references_close:
            if references_close:
                click_element((By.XPATH, "//button[@data-testid='tab-references-not-selected']"))
            click_element((By.XPATH, "//button[@data-testid='loadmore-button-References']"))
            references = store_search_results()
        
        print("\t\tReferences:")
        for i, reference in enumerate(references):
            print(f"\t\t\t{i+1}. '{reference}'")

        data[title.text] = {
            "Title": title.text,
            "Abstract": abstract.text,
            "Links": json.dumps(links),
            "Citations": json.dumps(citations),
            "References": json.dumps(references)
        }

        changed = True
        visited.append(title.text)
        print(f"\t{Colors.OKBLUE}Done Searching Paper: '{title.text}'\n\t\tNumber Papers: {len(data)}{Colors.ENDC}")
                
        if len(data) >= auto_save_size:
            save_data()
            print(f"{Colors.OKGREEN}{len(data)} Papers Saved{Colors.ENDC}")
            auto_save_size = math.ceil(auto_save_size * 1.5)
            
    if not changed:
        print("Search Complete")
        break

	Searching Paper:
		Title: 'The contribution of trait-mediated indirect effects to the net effects of a predator'
		Citations:
			1. 'The primacy of density‐mediated indirect effects in a community of wolves, elk, and aspen'
			2. 'Size‐specific reduction in kelp consumption by New Zealand urchins exposed to chemical cues from the red rock lobster'
			3. 'Chimeric states induced by higher-order interactions in coupled prey-predator systems.'
			4. 'Fish microbiota repel ovipositing mosquitoes.'
			5. 'Housework or vigilance? Bilbies alter their burrowing activity under threat of predation by feral cats'
			6. 'DNA metabarcoding reveals evidence of inter- and intra-guild predation by Scylla paramamosain in a marine ecosystem'
			7. 'Dynamic trait distribution as a source for shifts in interaction strength and population density'
			8. 'Predator-prey systems as models for integrative research in biology: the value of a non-consumptive effects framework.'
			9. 'Patchy indirect effects of

InvalidSessionIdException: Message: invalid session id
Stacktrace:
	GetHandleVerifier [0x00007FF7DE7A6EE5+28773]
	(No symbol) [0x00007FF7DE7125D0]
	(No symbol) [0x00007FF7DE5A8DDC]
	(No symbol) [0x00007FF7DE5EF17F]
	(No symbol) [0x00007FF7DE627112]
	(No symbol) [0x00007FF7DE621AA9]
	(No symbol) [0x00007FF7DE620B59]
	(No symbol) [0x00007FF7DE575595]
	GetHandleVerifier [0x00007FF7DEAF422D+3490733]
	GetHandleVerifier [0x00007FF7DEB0BA13+3586963]
	GetHandleVerifier [0x00007FF7DEB0144D+3544525]
	GetHandleVerifier [0x00007FF7DE86C9AA+838442]
	(No symbol) [0x00007FF7DE71D01F]
	(No symbol) [0x00007FF7DE5741AE]
	GetHandleVerifier [0x00007FF7DEB7ED78+4058872]
	BaseThreadInitThunk [0x00007FFB69B4E8D7+23]
	RtlUserThreadStart [0x00007FFB6A95BF2C+44]


In [None]:
driver.quit()