In [132]:
import pandas as pd
import os
import openai
import sys, pathlib, fitz
import re
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import TokenTextSplitter
from langchain.embeddings.openai  import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.retrievers.document_compressors import LLMChainFilter
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain import PromptTemplate
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT

In [2]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.environ['OPENAI_API_KEY']

In [148]:
def has_citations(page_text):
    # List of variations of "citations" to check for
    citation_variations = ["Citations", "Literature Cited", "Literature Citations"]

    for variation in citation_variations:
        if variation in page_text:
            return True
    return False

def remove_citation_pages(pdf_path):
    doc = fitz.open(pdf_path)
    non_citation_pages = []

    for page_num in range(doc.page_count):
        page = doc[page_num]
        page_text = page.get_text()

        if not has_citations(page_text):
            non_citation_pages.append(page_num)

    # Create a new PDF with only non-citation pages
    new_pdf = fitz.open()
    for page_num in non_citation_pages:
        new_pdf.insert_pdf(doc, from_page=page_num, to_page=page_num)

    return new_pdf


input_pdf_path = './hms/fhl_2014_Charifson_34622 (1).pdf'

# Create a new PDF with citation pages removed
new_pdf = remove_citation_pages(input_pdf_path)

# Save the new PDF to 'output.pdf'
output_pdf_path = 'output.pdf'
new_pdf.save(output_pdf_path)
# new_pdf.close()


In [149]:
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    doc.close()
    return text

# Replace 'output.pdf' with the path to the PDF you created with citation pages removed
new_pdf_path = 'output.pdf'

# Extract text from the new PDF
new_pdf_text = extract_text_from_pdf(new_pdf_path)

# Now you can manipulate the extracted text or perform any other actions as needed
# For example, you can print the extracted text:
print(new_pdf_text)

Charifson 1 
 
Snail Predation by Hemigrapsus nudus and Hemigrapsus oregonensis: Sex and Species 
Differences in Chela Size. 
 
David Charifson1,2 
 
Marine Invertebrate Zoology 
Summer 2014 
 
 
 
 
1 Friday Harbor Laboratories, University of Washington, Friday Harbor, WA 98250 
2 Department of Ecology and Evolution, Stony Brook University, Stony Brook, NY 11794-5245 
 
 
 
 
Contact information: 
David Charifson 
Department of Ecology and Evolution 
Stony Brook University 
650 Life Sciences Building 
Stony Brook, NY 11794-5245 
David.charifson@stonybrook.edu  
 
 
 
 
Keywords: Hemigrapsus nudus, Hemigrapsus orogenensis, Littorina scutulata, Lacuna vincta, 
predation, sexual dimorphism, character displacement  
 
 
Charifson 2 
 
Abstract: 
 
The relationship between propus size and use of snails as prey in sympatric populations 
of Hemigrapsus nudus and Hemigrapsus oregonensis was investigated. Propal height and propal 
width increases with body size in a predictable manner in male 

# ConversationalRetrievalChain + VectorStoreRetrieverMemory

In [5]:
tk_text_splitter = TokenTextSplitter(chunk_size = 800, chunk_overlap = 50)

docs = tk_text_splitter.split_text(new_pdf_text)

In [6]:
#Initialize Embeddings
embeddings = OpenAIEmbeddings()

In [7]:
vector_store = FAISS.from_texts(docs, embeddings).as_retriever(search_type = "similarity", search_kwargs= {"k": 3})

In [8]:
llm = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo')

compressor = LLMChainExtractor.from_llm(llm)

compression_retriever = ContextualCompressionRetriever(base_compressor = compressor,base_retriever=vector_store)


In [9]:
# # Build prompt
# template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer Keep the answer as concise as possible. Below are some helpful definitions.

# 1. SPECIES NAME: The scientific name (species name) of any plant, animal, fungus, alga or bacterium consists of two Latinized words. The first word is the name of the genus to which the organism belongs. The second word is the specific epithet or specific term of the species. Together, the genus plus the specific epithet make up the species name. The species name and scientific name are synonyms.
# 2. HABITAT: A species habitat can be seen as the physical manifestation of its ecological niche.
# 3. LOC: Name of any geographic location, like cities, countries, continents, districts etc.

# Examples:

# 1. Sentence: Strongylocentrotus fransiscanus and S. purpuratus were obtained from the subtidal and intertidal regions, respectively, of Monterey Bay.
# "Output: {{'SPECIES NAME': ['Strongylocentrotus fransiscanus', 'S. purpuratus'], 'HABITAT': ['subtidal', 'intertidal'], 'LOC': ['Monterey Bay']}}

# 2. Sentence: Cucumaria curata and C. pseudocurata live and feed in the hydrodynamically stressful environment of exposed intertidal areas.
# Output: {{'SPECIES NAME': ['Cucumaria curata', 'C. pseudocurata'], 'HABITAT': ['exposed intertidal'], 'LOC': ['None']}}\n"

# {context}
# Question: {question}
# Helpful Answer:"""
# CONDENSE_QUESTION_PROMPT   = PromptTemplate.from_template(template)


In [11]:
custom_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. Please use a maximum of 4 sentences. If you do not know the answer reply with 'I am sorry'.

1. SPECIES NAME: The scientific name (species name) of any plant, animal, fungus, alga or bacterium consists of two Latinized words. The first word is the name of the genus to which the organism belongs. The second word is the specific epithet or specific term of the species. Together, the genus plus the specific epithet make up the species name. The species name and scientific name are synonyms.
2. HABITAT: A species habitat can be seen as the physical manifestation of its ecological niche.
3. LOC: Name of any geographic location, like cities, countries, continents, districts etc.

Examples:

1. Sentence: Strongylocentrotus fransiscanus and S. purpuratus were obtained from the subtidal and intertidal regions, respectively, of Monterey Bay.
"Output: {{'SPECIES NAME': ['Strongylocentrotus fransiscanus', 'S. purpuratus'], 'HABITAT': ['subtidal', 'intertidal'], 'LOC': ['Monterey Bay']}}

2. Sentence: Cucumaria curata and C. pseudocurata live and feed in the hydrodynamically stressful environment of exposed intertidal areas.
Output: {{'SPECIES NAME': ['Cucumaria curata', 'C. pseudocurata'], 'HABITAT': ['exposed intertidal'], 'LOC': ['None']}}\n"

Chat History:
{chat_history}
Follow Up Input: {question}
"""

In [12]:
CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)

In [12]:
# question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
# doc_chain = load_qa_with_sources_chain(llm, chain_type="stuff")

In [12]:
memory = VectorStoreRetrieverMemory(
memory_key="chat_history",
return_docs=True,
retriever=vector_store,
return_messages=True)

In [14]:
#Formats the chat history string
def get_chat_history(inputs) -> str:
    res = []
    for human, ai in inputs:
        res.append(f"Human:{human}\nAI:{ai}")
    return "\n".join(res)

In [14]:
qa_chain = ConversationalRetrievalChain.from_llm(ChatOpenAI(), 
                                                 compression_retriever,
                                                 memory = memory,
                                                 return_source_documents = True,
                                                 condense_question_prompt = CUSTOM_QUESTION_PROMPT,
                                                 get_chat_history = get_chat_history)
                                                

In [17]:
query = "What is this paper about?"
query1a = "What were the significant findings on the relationship mentioned?"
query2 = "Summarize the paper concisely with reference to materials and methods."
query3 = "Write a one sentence summary of the purpose of the paper"
query4 = "Terms that may be used to identify an observation include “in the field”, “this study”, “observed”, “taken”, “collected”, “sampled”, “collection”, “seen”, “harvested”, “found”, etc. Does the paper include one or more observations?"
query5 = "Does this paper contain observational or experimental research conducted in the natural environment or with organisms collected in nature?"
query6 = "What are the scientific names of the collected species mentioned in this paper?"
query6a = "Where were the species collected?"
query7 = "Does the paper mention where the species were observed or collected, and if so, what locations are given?"
query7a = "Please list the species collected based on their scientific names along with location of collection."
query8 = "Can you give a more specific location?"
query9 = "Are any coordinate locations given in latitude / longitude, and if so, what are they?"
query10 = "In what habitat were the species found?"
query10a = "Can you give a more specific location of the habitat such as formal name of said location?"
query11 = "Does the paper mention a year, date and/or time that species were collected or observed, and if so, what was mentioned?"
query12 = "Are there any maps, figures, tables or diagrams in the paper?"

In [16]:
chat_history = []

In [17]:
result1= qa_chain({"question": query, "chat_history": chat_history})



In [18]:
result1["answer"]

'The main topic or subject matter of this paper is the coexistence and competition between two species of shore crabs, Hemigrapsus nudus and Hemigrapsus oregonensis, in the marine intertidal environment. The paper also explores the concept of character displacement and the morphological differences between the two species.'

In [19]:
result1["source_documents"]

[Document(page_content='Mean Consumption Rate  \n(Snails Consumed/Hour) \nIndividual H. nudus', metadata={}),
 Document(page_content='A multitude of species coexist in the marine intertidal environment. Closely related \nspecies may be sympatric and compete for resources, such as food. Two species of shore crab, \nHemigrapsus nudus (Dana 1851) and Hemigrapsus oregonensis (Dana 1851), occur in sympatry \nin the Salish Sea. Both species are sexually dimorphic, with the males having larger chelae. \nSpecies in sympatry may exhibit character displacement, the exaggeration of morphological or \nbehavioral characteristics due to competition for resources (Brown and Wilson 1956). When H. \nnudus and H. oregonensis co-occur, H. oregonensis tends to be found closer to the waterand in \nfiner sediment than the more desiccation-tolerant H. nudus (Sliger 1982). There is still \nconsiderable habitat overlap between these two species; the underside of a single rock may have \nCharifson 3 \n \nroughl

In [20]:
result1["chat_history"]

[Document(page_content='8 \n0.9 \n1 \nFe1 \nFe2 \nFe3 \nMa1 \nMa2 \nMa3 \nMean Consumption Rate  \n(Snails Consumed/Hour) \nIndividual H. nudus \n', metadata={}),
 Document(page_content='ace width was a good predictor of both propal height (Figure 1, Table 1A) and \npropal width (Figure 2, Table 1B) in female and male H. oregonensis and H. nudus. There was \nless variation in the relationship between carapace width and propal height than with propal \nwidth.  Due to non-normality of CW:PH and CW:PW an arcsin transformation was used. A \nsignificant effect for sex was found for both CW:PH (F1, 45 = 125.6, p < 0.001) and CW:PW  (F1, \n45 = 103.81, p < 0.001). There was no significant difference between Hemigrapsus species for \nCW:PH (F1, 45 < 0.01, p = 0.983) and CW:PW (F1, 45 = 0.09, p = 0.764). A significant \nsex*species interaction was detected for both CW:PH (F1, 45 4.39, p = 0.042) and CW:PW (F1, 45 \n= 7.19, p = 0.010). Figure 3 and 4 show the means of CW:PH and CW:PW in by sex a

In [21]:
result1a = qa_chain({"question": query1a, "chat_history": chat_history})

In [22]:
result1a["answer"]

'The significant findings on the relationship between carapace width and propal height and width in Hemigrapsus are as follows:\n\n1. Carapace width was a good predictor of both propal height and propal width in female and male H. oregonensis and H. nudus.\n\n2. There was less variation in the relationship between carapace width and propal height than with propal width.\n\n3. A significant effect for sex was found for both carapace width:propal height (CW:PH) and carapace width:propal width (CW:PW). \n\n4. There was no significant difference between Hemigrapsus species for CW:PH and CW:PW.\n\n5. A significant sex*species interaction was detected for both CW:PH and CW:PW.'

In [23]:
result1a["source_documents"]

[Document(page_content='Figure 1: Relationship of carapace width and propal height in Hemigrapsus. Line of best fit from \nSMA regression. See Table 1A for descriptive statistics. A) Female H. nudus. B) Male H. nudus. \nC) Female H. oregonensis. D) Male H. oregonensis.  \n\nFigure 2: Relationship of carapace width and propal width in Hemigrapsus.  \nLine of best fit from SMA regression. See Table 1B for descriptive statistics. A) Female H. \nnudus. B) Male H. nudus. C)Female H. oregonensis. D) Male H. oregonensis.  \n\nFigure 3: Differences in propal height:carapace width ratio between sex and species. \nThe sex factor was statistically significant (F = 125.6. p < 0.001), while the species factor was \ninsignificant (F > 0.01, p = 0.983). There was a significant interaction (F = 4.39, p = 0.042). \nError bars represent standard error of the mean.  \n\nFigure 4: Differences in propal width:carapace width ratio between sex and species. \nThe sex factor was statistically significant (F1,4

In [24]:
result1a["chat_history"]

[Document(page_content=' \n0 \n0 \nNT \n \n \n \n \nCharifson 15 \n \n \nFigure 1: Relationship of carapace width and propal height in Hemigrapsus. Line of best fit from \nSMA regression. See Table 1A for descriptive statistics. A) Female H. nudus. B) Male H. nudus. \nC) Female H. oregonensis. D) Male H. oregonensis.  \n \n \n \nCharifson 16 \n \n \nFigure 2: Relationship of carapace width and propal width in Hemigrapsus.  \nLine of best fit from SMA regression. See Table 1B for descriptive statistics. A) Female H. \nnudus. B) Male H. nudus. C)Female H. oregonensis. D) Male H. oregonensis.  \n \n \n \nCharifson 17 \n \n \nFigure 3: Differences in propal height:carapace width ratio between sex and species. \nThe sex factor was statistically significant (F = 125.6. p < 0.001), while the species factor was \ninsignificant (F > 0.01, p = 0.983). There was a significant interaction (F = 4.39, p = 0.042). \nError bars represent standard error of the mean.  \n \n \n \n0 \n0.05 \n0.1 \n0.15 \n

In [34]:
result2 = qa_chain({"question": query2, "chat_history": chat_history})

In [35]:
result2["answer"]

'The paper focuses on the study of snail predation by Hemigrapsus nudus and Hemigrapsus oregonensis, specifically examining sex and species differences in chela size. The materials and methods involved the measurement of carapace width and propal width in both female and male H. nudus. A line of best fit was determined using SMA regression. The propal height:carapace width ratio and propal width:carapace width ratio were analyzed to determine differences between sexes and species. The preference experiment involved providing crabs with five snails in each trial and measuring the number of snails consumed after 1.5 to 3 hours, which was then converted to a consumption rate. A total of eight feeding trials were conducted.'

In [36]:
result2["source_documents"]

[Document(page_content='The main topic of this paper is the study of snail predation by Hemigrapsus nudus and Hemigrapsus oregonensis, specifically examining sex and species differences in chela size.', metadata={}),
 Document(page_content='Figure 2: The relationship between carapace width and propal width in Hemigrapsus:\n   - For both female and male H. nudus, a line of best fit was determined using SMA regression.\n   - Descriptive statistics for this relationship can be found in Table 1B.\n\nFigure 3: Differences in propal height:carapace width ratio between sex and species:\n   - The sex factor was statistically significant (F = 125.6, p < 0.001), indicating that there is a significant difference in propal height:carapace width ratio between males and females.\n   - The species factor was insignificant (F > 0.01, p = 0.983), suggesting that there is no significant difference in propal height:carapace width ratio between H. oregonensis and H. nudus.\n   - There was a significant in

In [37]:
result2["chat_history"]

[Document(page_content='question: What is this paper about?\nanswer: The main topic of this paper is the study of snail predation by Hemigrapsus nudus and Hemigrapsus oregonensis, specifically examining sex and species differences in chela size.\nsource_documents: [Document(page_content=\'The main topic or subject of this paper is the "Snail Predation by Hemigrapsus nudus and Hemigrapsus oregonensis: Sex and Species Differences in Chela Size."\', metadata={})]', metadata={}),
 Document(page_content="question: What were the significant findings on the relationship mentioned?\nanswer: From the given information, we can determine the following significant findings:\n\n1. Figure 2: The relationship between carapace width and propal width in Hemigrapsus:\n   - For both female and male H. nudus, a line of best fit was determined using SMA regression.\n   - Descriptive statistics for this relationship can be found in Table 1B.\n\n2. Figure 3: Differences in propal height:carapace width ratio 

In [25]:
result3 = qa_chain({"question": query3, "chat_history": chat_history})

In [26]:
result3["answer"]

'The main objective of this paper is to investigate the relationship between carapace width and propal height and width in two species of shore crabs, Hemigrapsus nudus and Hemigrapsus oregonensis. The paper also examines the potential impact of sex and species on this relationship.'

In [27]:
result3["source_documents"]

[Document(page_content='The main topic or subject matter of this paper is the coexistence and competition between two species of shore crabs, Hemigrapsus nudus and Hemigrapsus oregonensis, in the marine intertidal environment. The paper also explores the concept of character displacement and the morphological differences between the two species.', metadata={}),
 Document(page_content='ace width was a good predictor of both propal height (Figure 1, Table 1A) and \npropal width (Figure 2, Table 1B) in female and male H. oregonensis and H. nudus. There was \nless variation in the relationship between carapace width and propal height than with propal \nwidth. A significant effect for sex was found for both CW:PH (F1, 45 = 125.6, p < 0.001) and CW:PW  (F1, \n45 = 103.81, p < 0.001). There was no significant difference between Hemigrapsus species for \nCW:PH (F1, 45 < 0.01, p = 0.983) and CW:PW (F1, 45 = 0.09, p = 0.764). A significant \nsex*species interaction was detected for both CW:PH (F

In [28]:
result3["chat_history"]

[Document(page_content="question: What is this paper about?\nanswer: The main topic or subject matter of this paper is the coexistence and competition between two species of shore crabs, Hemigrapsus nudus and Hemigrapsus oregonensis, in the marine intertidal environment. The paper also explores the concept of character displacement and the morphological differences between the two species.\nsource_documents: [Document(page_content='Mean Consumption Rate  \\n(Snails Consumed/Hour) \\nIndividual H. nudus', metadata={}), Document(page_content='A multitude of species coexist in the marine intertidal environment. Closely related \\nspecies may be sympatric and compete for resources, such as food. Two species of shore crab, \\nHemigrapsus nudus (Dana 1851) and Hemigrapsus oregonensis (Dana 1851), occur in sympatry \\nin the Salish Sea. Both species are sexually dimorphic, with the males having larger chelae. \\nSpecies in sympatry may exhibit character displacement, the exaggeration of morph

In [29]:
query3a = "Since the crabs were feeding on snails, what were the scientific names of the snails?"
result3a = qa_chain({"question": query3a, "chat_history": chat_history})

In [30]:
result3a["answer"]

'The scientific names of the snails that the crabs were feeding on are Littorina scutulata and Lacuna vincta.'

In [31]:
result3a["source_documents"]

[Document(page_content='Littorina scutulata (Gould 1849) and Littorina sitkana (Philippi 1846)', metadata={}),
 Document(page_content='Hemigrapsus nudus, Hemigrapsus oregonensis, Littorina scutulata, Lacuna vincta', metadata={}),
 Document(page_content='Hemigrapsus nudus and Hemigrapsus oregonensis', metadata={})]

In [32]:
result3a["chat_history"]

[Document(page_content=' of the chela \nmay differ, which could allow the crabs to differentiate in food resource utilization. \nBoth species are omnivores that eat algae and small invertebrates, including snails \n(Behrens Yamada and Boulding 1996). The two shore crab species are known to eat Littorina \nsitkana (Philippi 1846) and Littorina scutulata (Gould 1849), which also occur in the rocky \nintertidal zone (Behrens Yamada and Boulding 1996). Li. scutulata moves upshore in response \nto the presence of H. nudus and crushed conspecifics, where H. nudus density is lower (Keppel \nand Scrosati 2004). This suggests that H. nudus represents an ecologically significant predation \nthreat to littorines.  \n \nThe thick shelled littorines are difficult prey for both H. nudus and H. oregonensis as \nneither crab species is a molluscivore specialists; they typically scrape or pick algae and softer \ninvertebrates off rocks using their chelae (Behrens Yamada and Boulding 1996). Behrens \nYa

In [33]:
result4 = qa_chain({"question": query4, "chat_history": chat_history})

In [34]:
result4["answer"]

'Based on the given context, there is no specific mention of observations made in the field or any other type of data collection methods used in the paper.'

In [35]:
result4["source_documents"]

[Document(page_content='The main objective of this paper is to investigate the relationship between carapace width and propal height and width in two species of shore crabs, Hemigrapsus nudus and Hemigrapsus oregonensis. The paper also examines the potential impact of sex and species on this relationship.', metadata={}),
 Document(page_content='The main topic or subject matter of this paper is the coexistence and competition between two species of shore crabs, Hemigrapsus nudus and Hemigrapsus oregonensis, in the marine intertidal environment. The paper also explores the concept of character displacement and the morphological differences between the two species.', metadata={}),
 Document(page_content='The scientific names of the snails that the crabs were feeding on are Littorina scutulata and Lacuna vincta.', metadata={})]

In [36]:
result4["chat_history"]

[Document(page_content="question: What is this paper about?\nanswer: The main topic or subject matter of this paper is the coexistence and competition between two species of shore crabs, Hemigrapsus nudus and Hemigrapsus oregonensis, in the marine intertidal environment. The paper also explores the concept of character displacement and the morphological differences between the two species.\nsource_documents: [Document(page_content='Mean Consumption Rate  \\n(Snails Consumed/Hour) \\nIndividual H. nudus', metadata={}), Document(page_content='A multitude of species coexist in the marine intertidal environment. Closely related \\nspecies may be sympatric and compete for resources, such as food. Two species of shore crab, \\nHemigrapsus nudus (Dana 1851) and Hemigrapsus oregonensis (Dana 1851), occur in sympatry \\nin the Salish Sea. Both species are sexually dimorphic, with the males having larger chelae. \\nSpecies in sympatry may exhibit character displacement, the exaggeration of morph

In [37]:
result5 = qa_chain({"question": query5, "chat_history": chat_history})

In [38]:
result5["answer"]

'The research conducted in this paper is primarily focused on investigating the relationship between carapace width and propal height and width in two species of shore crabs, Hemigrapsus nudus and Hemigrapsus oregonensis. The paper also examines the potential impact of sex and species on this relationship.'

In [39]:
result5["source_documents"]

[Document(page_content='The main objective of this paper is to investigate the relationship between carapace width and propal height and width in two species of shore crabs, Hemigrapsus nudus and Hemigrapsus oregonensis. The paper also examines the potential impact of sex and species on this relationship.', metadata={}),
 Document(page_content='The main objective of this paper is to investigate the relationship between carapace width and propal height and width in two species of shore crabs, Hemigrapsus nudus and Hemigrapsus oregonensis. The paper also examines the potential impact of sex and species on this relationship.', metadata={}),
 Document(page_content='The main topic or subject matter of this paper is the coexistence and competition between two species of shore crabs, Hemigrapsus nudus and Hemigrapsus oregonensis, in the marine intertidal environment. The paper also explores the concept of character displacement and the morphological differences between the two species.', meta

In [40]:
result5["chat_history"]

[Document(page_content="question: Terms that may be used to identify an observation include “in the field”, “this study”, “observed”, “taken”, “collected”, “sampled”, “collection”, “seen”, “harvested”, “found”, etc. Does the paper include one or more observations?\nanswer: Based on the given context, there is no specific mention of observations made in the field or any other type of data collection methods used in the paper.\nsource_documents: [Document(page_content='The main objective of this paper is to investigate the relationship between carapace width and propal height and width in two species of shore crabs, Hemigrapsus nudus and Hemigrapsus oregonensis. The paper also examines the potential impact of sex and species on this relationship.', metadata={}), Document(page_content='The main topic or subject matter of this paper is the coexistence and competition between two species of shore crabs, Hemigrapsus nudus and Hemigrapsus oregonensis, in the marine intertidal environment. The

In [41]:
result6 = qa_chain({"question": query6, "chat_history": chat_history})

In [42]:
result6["answer"]

'The scientific names of the species mentioned in this paper are Hemigrapsus nudus and Hemigrapsus oregonensis.'

In [43]:
result6["source_documents"]

[Document(page_content='The scientific names of the snails that the crabs were feeding on are Littorina scutulata and Lacuna vincta.', metadata={}),
 Document(page_content='The scientific names of the snails that the crabs were feeding on are Littorina scutulata and Lacuna vincta.', metadata={}),
 Document(page_content='The research conducted in this paper is primarily focused on investigating the relationship between carapace width and propal height and width in two species of shore crabs, Hemigrapsus nudus and Hemigrapsus oregonensis.', metadata={})]

In [44]:
result6["chat_history"]

[Document(page_content="question: Since the crabs were feeding on snails, what were the scientific names of the snails?\nanswer: The scientific names of the snails that the crabs were feeding on are Littorina scutulata and Lacuna vincta.\nsource_documents: [Document(page_content='Littorina scutulata (Gould 1849) and Littorina sitkana (Philippi 1846)', metadata={}), Document(page_content='Hemigrapsus nudus, Hemigrapsus oregonensis, Littorina scutulata, Lacuna vincta', metadata={}), Document(page_content='Hemigrapsus nudus and Hemigrapsus oregonensis', metadata={})]", metadata={}),
 Document(page_content="question: Terms that may be used to identify an observation include “in the field”, “this study”, “observed”, “taken”, “collected”, “sampled”, “collection”, “seen”, “harvested”, “found”, etc. Does the paper include one or more observations?\nanswer: Based on the given context, there is no specific mention of observations made in the field or any other type of data collection methods use

In [45]:
result6a = qa_chain({"question": query6a, "chat_history": chat_history})

In [46]:
result6a["answer"]

'The locations from which the collected species were obtained are not mentioned in the given context.'

In [47]:
result6a["source_documents"]

[Document(page_content='The research conducted in this paper is primarily focused on investigating the relationship between carapace width and propal height and width in two species of shore crabs, Hemigrapsus nudus and Hemigrapsus oregonensis.', metadata={}),
 Document(page_content='The scientific names of the snails that the crabs were feeding on are Littorina scutulata and Lacuna vincta.', metadata={})]

In [48]:
result7 = qa_chain({"question": query7, "chat_history": chat_history})

In [49]:
result7["answer"]

'The locations from which the collected species were obtained are not mentioned in the given context.'

In [50]:
result7["source_documents"]

[Document(page_content='The locations from which the collected species were obtained are not mentioned in the given context.', metadata={}),
 Document(page_content='The research conducted in this paper is primarily focused on investigating the relationship between carapace width and propal height and width in two species of shore crabs, Hemigrapsus nudus and Hemigrapsus oregonensis.', metadata={})]

In [51]:
result7["chat_history"]

[Document(page_content="question: Where were the species collected?\nanswer: The locations from which the collected species were obtained are not mentioned in the given context.\nsource_documents: [Document(page_content='The research conducted in this paper is primarily focused on investigating the relationship between carapace width and propal height and width in two species of shore crabs, Hemigrapsus nudus and Hemigrapsus oregonensis.', metadata={}), Document(page_content='The scientific names of the snails that the crabs were feeding on are Littorina scutulata and Lacuna vincta.', metadata={})]", metadata={}),
 Document(page_content="question: Terms that may be used to identify an observation include “in the field”, “this study”, “observed”, “taken”, “collected”, “sampled”, “collection”, “seen”, “harvested”, “found”, etc. Does the paper include one or more observations?\nanswer: Based on the given context, there is no specific mention of observations made in the field or any other 

In [52]:
result7a = qa_chain({"question": query7a, "chat_history": chat_history})

In [53]:
result7a["answer"]

'The scientific names of the collected species mentioned in this paper are Hemigrapsus nudus and Hemigrapsus oregonensis. However, the locations from which these species were obtained are not mentioned in the given context.'

In [54]:
result8 = qa_chain({"question": query8, "chat_history": chat_history})

In [55]:
result8["answer"]

'No, the paper does not mention any specific location where the collected species were obtained.'

In [56]:
result9 = qa_chain({"question": query9, "chat_history": chat_history})

In [57]:
result9["answer"]

"I'm sorry, but I don't have any specific coordinate locations in latitude/longitude for the collected species mentioned in the given context."

In [58]:
result10 = qa_chain({"question": query10, "chat_history": chat_history})

In [59]:
result10["answer"]

'No, the paper does not provide information about the specific habitat where the species were found.'

In [60]:
result10["source_documents"]

[Document(page_content='No, the paper does not mention any specific location where the collected species were obtained.', metadata={}),
 Document(page_content='The research conducted in this paper is primarily focused on investigating the relationship between carapace width and propal height and width in two species of shore crabs, Hemigrapsus nudus and Hemigrapsus oregonensis.\nThe scientific names of the snails that the crabs were feeding on are Littorina scutulata and Lacuna vincta.', metadata={}),
 Document(page_content='The locations from which the collected species were obtained are not mentioned in the given context.', metadata={})]

In [61]:
result10a = qa_chain({"question": query10a, "chat_history": chat_history})

In [62]:
result10a["answer"]

'No, the paper does not mention any formal name or specific geographic location for the habitat where the species were collected.'

In [63]:
result11 = qa_chain({"question": query11, "chat_history": chat_history})

In [64]:
result11["answer"]

'No, the paper does not mention any year, date, or time regarding the collection or observation of the species mentioned.'

In [65]:
result11["source_documents"]

[Document(page_content='No, the paper does not mention any specific location where the collected species were obtained.', metadata={}),
 Document(page_content='No, the paper does not mention any specific location where the collected species were obtained.', metadata={}),
 Document(page_content='The locations from which the collected species were obtained are not mentioned in the given context.', metadata={})]

In [66]:
result11["chat_history"]

[Document(page_content="question: Does the paper mention where the species were observed or collected, and if so, what locations are given?\nanswer: The locations from which the collected species were obtained are not mentioned in the given context.\nsource_documents: [Document(page_content='The locations from which the collected species were obtained are not mentioned in the given context.', metadata={}), Document(page_content='The research conducted in this paper is primarily focused on investigating the relationship between carapace width and propal height and width in two species of shore crabs, Hemigrapsus nudus and Hemigrapsus oregonensis.', metadata={})]", metadata={}),
 Document(page_content="question: Can you give a more specific location of the habitat such as formal name of said location?\nanswer: No, the paper does not mention any formal name or specific geographic location for the habitat where the species were collected.\nsource_documents: [Document(page_content='No, the 

In [67]:
result12 = qa_chain({"question": query12, "chat_history": chat_history})

In [68]:
result12["answer"]

'No, the paper does not mention any visual representations such as maps, figures, tables, or diagrams.'

# ConversationalRChain with map_reduce

In [8]:
llm = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo')

compressor = LLMChainExtractor.from_llm(llm)

compression_retriever2 = ContextualCompressionRetriever(base_compressor = compressor,base_retriever=vector_store)


In [9]:
memory = VectorStoreRetrieverMemory(
memory_key="chat_history",
return_docs=True,
retriever=vector_store,
return_messages=True)

In [15]:
qa_chain_reduce = ConversationalRetrievalChain.from_llm(ChatOpenAI(),
                                                 compression_retriever2,
                                                 memory = memory,
                                                 return_source_documents = True,
                                                 condense_question_prompt = CUSTOM_QUESTION_PROMPT,
                                                 get_chat_history = get_chat_history,
                                                 chain_type = "map_reduce")
                                                

In [21]:
chat_history = []

In [22]:
result_reduce = qa_chain_reduce({"question": query, "chat_history": chat_history})



In [23]:
result_reduce["answer"]

'The subject matter of this paper appears to be the morphometric analysis of two species of crabs, Hemigrapsus nudus and Hemigrapsus orogenensis.'

In [24]:
result_reduce["source_documents"]

[Document(page_content='The relationship between propus size and use of snails as prey in sympatric populations \nof Hemigrapsus nudus and Hemigrapsus oregonensis was investigated. Propal height and propal \nwidth increases with body size in a predictable manner in male and female crabs of both species. \nSexual dimorphism of propal height and width relationships in both shore crabs is apparent, but \nthere was no detectible difference in chela size between H. nudus and H. oregonensis. The lack \nof difference in chela size between sympatric H. nudus and H. oregonensis suggests that there is \nno character displacement in this trait. Both species of crabs showed a strong feeding preference \nfor the thin-shelled gastropod Lacuna vincta over the thicker shelled Littorina scutulata, likely \ndue to the ease of consumption. There were no differences in snail consumption rates between \nmale and female H. nudus of similar propal heights, mostly due to high feeding variation among \nindivid

In [25]:
result_reduce["chat_history"]

[Document(page_content='8 \n0.9 \n1 \nFe1 \nFe2 \nFe3 \nMa1 \nMa2 \nMa3 \nMean Consumption Rate  \n(Snails Consumed/Hour) \nIndividual H. nudus \n', metadata={}),
 Document(page_content='ace width was a good predictor of both propal height (Figure 1, Table 1A) and \npropal width (Figure 2, Table 1B) in female and male H. oregonensis and H. nudus. There was \nless variation in the relationship between carapace width and propal height than with propal \nwidth.  Due to non-normality of CW:PH and CW:PW an arcsin transformation was used. A \nsignificant effect for sex was found for both CW:PH (F1, 45 = 125.6, p < 0.001) and CW:PW  (F1, \n45 = 103.81, p < 0.001). There was no significant difference between Hemigrapsus species for \nCW:PH (F1, 45 < 0.01, p = 0.983) and CW:PW (F1, 45 = 0.09, p = 0.764). A significant \nsex*species interaction was detected for both CW:PH (F1, 45 4.39, p = 0.042) and CW:PW (F1, 45 \n= 7.19, p = 0.010). Figure 3 and 4 show the means of CW:PH and CW:PW in by sex a

In [26]:
result_reduce1a = qa_chain_reduce({"question": query1a, "chat_history": chat_history})

In [27]:
result_reduce1a["answer"]

'The significant findings on the relationship between carapace width and propal height and width in Hemigrapsus crabs are as follows:\n\n- The relationship between carapace width and propal height was analyzed for female H. nudus, male H. nudus, female H. oregonensis, and male H. oregonensis. The line of best fit from SMA regression was provided in Figure 1. The results showed a high variance in propal height explained by carapace width in female H. nudus, with an R2 value of 0.976.\n\n- The relationship between carapace width and propal width was also analyzed for female H. nudus, male H. nudus, female H. oregonensis, and male H. oregonensis. The line of best fit from SMA regression was provided in Figure 2. The results showed a high variance in propal width explained by carapace width in female H. nudus, with an R2 value of 0.927.\n\n- In male H. oregenensis, there was a greater variation in chela size parameters. The R2 values for propal height and propal width regressed against car

In [28]:
result_reduce1a["source_documents"]

[Document(page_content='Figure 1: Relationship of carapace width and propal height in Hemigrapsus. Line of best fit from \nSMA regression. See Table 1A for descriptive statistics. A) Female H. nudus. B) Male H. nudus. \nC) Female H. oregonensis. D) Male H. oregonensis.  \n\nFigure 2: Relationship of carapace width and propal width in Hemigrapsus.  \nLine of best fit from SMA regression. See Table 1B for descriptive statistics. A) Female H. \nnudus. B) Male H. nudus. C)Female H. oregonensis. D) Male H. oregonensis.  \n\nFigure 3: Differences in propal height:carapace width ratio between sex and species. \nThe sex factor was statistically significant (F = 125.6. p < 0.001), while the species factor was \ninsignificant (F > 0.01, p = 0.983). There was a significant interaction (F = 4.39, p = 0.042). \nError bars represent standard error of the mean.  \n\nFigure 4: Differences in propal width:carapace width ratio between sex and species. \nThe sex factor was statistically significant (F1,4

In [29]:
result_reduce2 = qa_chain_reduce({"question": query2, "chat_history": chat_history})

In [30]:
result_reduce2["answer"]

'The concise summary of the paper and the materials and methods used cannot be determined based on the given portion of the document.'

In [31]:
result_reduce2["source_documents"]

[Document(page_content='The subject matter of this paper appears to be the morphometric analysis of two species of crabs, Hemigrapsus nudus and Hemigrapsus orogenensis. The relationship between propus size and use of snails as prey in sympatric populations of Hemigrapsus nudus and Hemigrapsus oregonensis was investigated. Propal height and propal width increases with body size in a predictable manner in male and female crabs of both species. Sexual dimorphism of propal height and width relationships in both shore crabs is apparent, but there was no detectible difference in chela size between H. nudus and H. oregonensis. The lack of difference in chela size between sympatric H. nudus and H. oregonensis suggests that there is no character displacement in this trait. Both species of crabs showed a strong feeding preference for the thin-shelled gastropod Lacuna vincta over the thicker shelled Littorina scutulata, likely due to the ease of consumption. There were no differences in snail con

In [32]:
result_reduce3 = qa_chain_reduce({"question": query3, "chat_history": chat_history})

In [33]:
result_reduce3["answer"]

'The purpose of the paper is to investigate the relationship between propus size and the use of snails as prey in sympatric populations of Hemigrapsus nudus and Hemigrapsus oregonensis.'

In [34]:
result_reduce3["source_documents"]

[Document(page_content='The subject matter of this paper appears to be the morphometric analysis of two species of crabs, Hemigrapsus nudus and Hemigrapsus orogenensis. The relationship between propus size and use of snails as prey in sympatric populations of Hemigrapsus nudus and Hemigrapsus oregonensis was investigated. Propal height and propal width increases with body size in a predictable manner in male and female crabs of both species. Sexual dimorphism of propal height and width relationships in both shore crabs is apparent, but there was no detectible difference in chela size between H. nudus and H. oregonensis. The lack of difference in chela size between sympatric H. nudus and H. oregonensis suggests that there is no character displacement in this trait. Both species of crabs showed a strong feeding preference for the thin-shelled gastropod Lacuna vincta over the thicker shelled Littorina scutulata, likely due to the ease of consumption. There were no differences in snail con

In [35]:
result_reduce3["chat_history"]

[Document(page_content="question: Summarize the paper concisely with reference to materials and methods.\nanswer: The concise summary of the paper and the materials and methods used cannot be determined based on the given portion of the document.\nsource_documents: [Document(page_content='The subject matter of this paper appears to be the morphometric analysis of two species of crabs, Hemigrapsus nudus and Hemigrapsus orogenensis. The relationship between propus size and use of snails as prey in sympatric populations of Hemigrapsus nudus and Hemigrapsus oregonensis was investigated. Propal height and propal width increases with body size in a predictable manner in male and female crabs of both species. Sexual dimorphism of propal height and width relationships in both shore crabs is apparent, but there was no detectible difference in chela size between H. nudus and H. oregonensis. The lack of difference in chela size between sympatric H. nudus and H. oregonensis suggests that there is 

In [36]:
result_reduce4 = qa_chain_reduce({"question": query4, "chat_history": chat_history})

In [37]:
result_reduce4["answer"]

'Based on the provided portion of the document, it is not possible to determine if the paper includes any observations made in the field, in this study, or through the collection, sampling, or harvesting of specimens. The given text only describes the purpose of the paper, which is to investigate the relationship between propus size and the use of snails as prey in sympatric populations of Hemigrapsus nudus and Hemigrapsus oregonensis. Therefore, it is unclear if any direct observations or specimen collection were conducted.'

In [38]:
result_reduce5 = qa_chain_reduce({"question": query5, "chat_history": chat_history})

In [39]:
result_reduce5["answer"]

'No, there is no mention of observational or experimental research conducted in the natural environment or with organisms collected in nature in this portion of the document.'

In [40]:
result_reduce6 = qa_chain_reduce({"question": query6, "chat_history": chat_history})

In [41]:
result_reduce6["answer"]

'The scientific names of the species collected in this paper are Hemigrapsus nudus and Hemigrapsus orogenensis.'

In [42]:
result_reduce6a = qa_chain_reduce({"question": query6a, "chat_history": chat_history})

In [43]:
result_reduce6a["answer"]

'The document does not provide information about the specific location where the species mentioned in the paper were collected from.'

In [44]:
result_reduce7 = qa_chain_reduce({"question": query7, "chat_history": chat_history})

In [45]:
result_reduce7["answer"]

"I don't know. The document does not provide information about the specific locations where the species mentioned in the paper were observed or collected."

In [46]:
result_reduce7a = qa_chain_reduce({"question": query7a, "chat_history": chat_history})

In [47]:
result_reduce7a["answer"]

'The document does not provide information about the specific locations where the species mentioned in the paper were observed or collected.'

In [48]:
result_reduce8 = qa_chain_reduce({"question": query8, "chat_history": chat_history})

In [49]:
result_reduce8["answer"]

'No, the document does not provide any additional information about the specific locations where the species mentioned in the paper were observed or collected.'

In [50]:
result_reduce9 = qa_chain_reduce({"question": query9, "chat_history": chat_history})

In [51]:
result_reduce9["answer"]

'No, the document does not provide any latitude/longitude coordinate locations for the species mentioned in the paper.'

In [52]:
result_reduce10 = qa_chain_reduce({"question": query10, "chat_history": chat_history})

In [53]:
result_reduce10["answer"]

"I'm sorry, but based on the given portion of the document, there is no information available about the habitat in which the species mentioned in the paper were found."

In [54]:
result_reduce10a = qa_chain_reduce({"question": query10a, "chat_history": chat_history})

In [55]:
result_reduce10a["answer"]

'No, the document does not provide the formal name of the specific location where the habitat of the species mentioned in the paper was observed or collected.'

In [56]:
result_reduce11 = qa_chain_reduce({"question": query11, "chat_history": chat_history})

In [57]:
result_reduce11["answer"]

'The document does not provide any information about the year, date, or time that the species were collected or observed.'

In [58]:
result_reduce12 = qa_chain_reduce({"question": query12, "chat_history": chat_history})

In [59]:
result_reduce12 = qa_chain_reduce({"question": query12, "chat_history": chat_history})

In [60]:
result_reduce12["answer"]

'No, the paper does not contain any maps, figures, tables, or diagrams.'

# ***ConversationalRetrievalChain Similarity Search w/ ConversationBufferMemory ***

In [61]:
vector_store2 = FAISS.from_texts(docs, embeddings).as_retriever(search_type = "similarity", search_kwargs= {"k": 3})

In [62]:
llm = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo')

compressor = LLMChainExtractor.from_llm(llm)

compression_retriever2 = ContextualCompressionRetriever(base_compressor = compressor,base_retriever=vector_store2)


In [63]:
custom_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. Please use a maximum of 4 sentences. If you do not know the answer reply with 'I am sorry'.
1. SPECIES NAME: The scientific name (species name) of any plant, animal, fungus, alga or bacterium consists of two Latinized words. The first word is the name of the genus to which the organism belongs. The second word is the specific epithet or specific term of the species. Together, the genus plus the specific epithet make up the species name. The species name and scientific name are synonyms.
2. HABITAT: A species habitat can be seen as the physical manifestation of its ecological niche.
3. LOC: Name of any geographic location, like cities, countries, continents, districts etc.

Examples:

1. Sentence: Strongylocentrotus fransiscanus and S. purpuratus were obtained from the subtidal and intertidal regions, respectively, of Monterey Bay.
"Output: {{'SPECIES NAME': ['Strongylocentrotus fransiscanus', 'S. purpuratus'], 'HABITAT': ['subtidal', 'intertidal'], 'LOC': ['Monterey Bay']}}

2. Sentence: Cucumaria curata and C. pseudocurata live and feed in the hydrodynamically stressful environment of exposed intertidal areas.
Output: {{'SPECIES NAME': ['Cucumaria curata', 'C. pseudocurata'], 'HABITAT': ['exposed intertidal'], 'LOC': ['None']}}\n"

Chat History:
{chat_history}
Follow Up Input: {question}
"""

In [64]:
CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)

In [66]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(llm=llm, input_key='question', output_key='answer',  return_messages=True)

In [67]:
#Formats the chat history string
def get_chat_history(inputs) -> str:
    res = []
    for human, ai in inputs:
        res.append(f"Human:{human}\nAI:{ai}")
    return "\n".join(res)

In [108]:
qa_chain_2 = ConversationalRetrievalChain.from_llm(ChatOpenAI(), 
                                                 compression_retriever2,
                                                 memory = memory,
                                                 return_source_documents = True,
                                                 condense_question_prompt = CUSTOM_QUESTION_PROMPT,
                                                 get_chat_history = get_chat_history)
                                                

In [69]:
query = "What is this paper about?"
query1a = "What were the significant findings on the relationship mentioned?"
query2 = "Summarize the paper concisely with reference to materials and methods."
query3 = "Write a one sentence summary of the purpose of the paper"
query4 = "Terms that may be used to identify an observation include “in the field”, “this study”, “observed”, “taken”, “collected”, “sampled”, “collection”, “seen”, “harvested”, “found”, etc. Does the paper include one or more observations?"
query5 = "Does this paper contain observational or experimental research conducted in the natural environment or with organisms collected in nature?"
query6 = "What are the scientific names of the collected species mentioned in this paper?"
query6a = "Were there other species collected?"
query7 = "Does the paper mention where the species were observed or collected, and if so, what locations are given?"
query7a = "Please list the species collected based on their scientific names along with location of collection."
query8 = "Can you give a more specific location?"
query9 = "Are any coordinate locations given in latitude / longitude, and if so, what are they?"
query10 = "In what habitat were the species found?"
query10a = "Can you give a more specific location of the habitat such as formal name of said location?"
query11 = "Does the paper mention a year, date and/or time that species were collected or observed, and if so, what was mentioned?"
query12 = "Are there any maps, figures, tables or diagrams in the paper?"

In [70]:
chat_history = []

In [71]:
result1b = qa_chain_2({"question": query, "chat_history": chat_history})

In [72]:
result1b["answer"]

'This paper is about the morphometric analysis of Hemigrapsus nudus and Hemigrapsus orogenensis, specifically focusing on the relationship between propus size and the use of snails as prey in sympatric populations. The paper explores the patterns and variations in propal height and width in male and female crabs of both species and examines the feeding preferences of the crabs for different types of snails. It also discusses the lack of difference in chela size between the two species and suggests that there is no character displacement in this trait.'

In [103]:
result1b["source_documents"]

[Document(page_content='Mean Consumption Rate  \n(Snails Consumed/Hour) \nIndividual H. nudus', metadata={}),
 Document(page_content='A few interesting patterns emerged in the morphometric analysis of Hemigrapsus nudus and Hemigrapsus orogenensis. The variance in propal height or propal width explained by carapace width was surprisingly high in female H. nudus with R2 values of 0.976 found for propal height and 0.927 for propal width. Male H. oregenensis had the greatest variation in chela size parameters, demonstrated by R2 of 0.693 and 0.534 for propal height and propal width respectively. The R2 values for propal width regressed against carapace width were consistently lower than propal height regressed against carapace width across species and sexes (Table 1). In general the ability of carapace width to explain the variance in chela size parameters (i.', metadata={}),
 Document(page_content='The relationship between propus size and use of snails as prey in sympatric populations \no

In [104]:
result1b["chat_history"]

[]

In [75]:
result2b = qa_chain_2({"question": query2, "chat_history": chat_history})

In [76]:
result2b["answer"]

'The paper examined the relationship between carapace width and chela size in two species of shore crabs, Hemigrapsus nudus and Hemigrapsus oregonensis. The study focused on both females and males of the two species. The authors used a two-way ANOVA with factors sex and species to analyze the data. They also performed an arcsin transformation due to non-normality of certain variables. The results showed that carapace width was a good predictor of propal height and propal width in both female and male crabs. There was no significant difference in chela size between the two species. However, a significant sex*species interaction was found. The study suggests that trade-offs between claw size and factors like feeding efficiency or resource allocation may explain the variation in chela size.'

In [102]:
result2b["source_documents"]

[Document(page_content='apace width were consistently \nlower than propal height regressed against carapace width across species and sexes (Table 1). In \ngeneral the ability of carapace width to explain the variance in chela size parameters (i.e.: R2 \nvalues) was greater in females of both crab species (Table 1, Figures 1 and 2). Males may have \nmore variation in claw size due to trade-offs between claw size and factors like feeding \nefficiency or resource allocation. Trade-offs relating to chela size are found in fiddler crabs, \nwhich exhibit extreme sexual dimorphism. Fiddler crabs have a trade-off between the intrasexual \ncompetition and intersexual signaling functions of the dimorphic claw (Swanson et al. 2013). \nCharifson 8 \n \nThis is not to say that the particular trade-offs in fiddler crabs case are analogous to that of \nHemigrapsus, since both claws in the two shore crabs in this study are used in feeding.  \nSexual dimorphism in chela size, which is readily apparent 

In [105]:
result2b["chat_history"]

[]

In [79]:
result3b = qa_chain_2({"question": query3, "chat_history": chat_history})

In [80]:
result3b["answer"]

'The purpose of the paper was to investigate the relationship between propus size and the use of snails as prey in sympatric populations of Hemigrapsus nudus and Hemigrapsus oregonensis, and to determine if there was any character displacement in chela size between the two species.'

In [81]:
result4b = qa_chain_2({"question": query4, "chat_history": chat_history})

In [82]:
result4b["answer"]

'Yes, the paper includes several observations. It mentions the collection of una vincta and Littorina scutulata from different locations, the absence of detectable differences in chela size between H. nudus and H. oregonensis, and the feeding preference of both crab species for Lacuna vincta over Littorina scutulata. It also states that there were no differences in snail consumption rates between male and female H. nudus of similar propal heights.'

In [83]:
result5b = qa_chain_2({"question": query5, "chat_history": chat_history})

In [84]:
result5b["answer"]

"Based on the given context, it appears that the research described in the paper is observational in nature. The crabs' feeding preferences and snail consumption rates were observed in sympatric populations of Hemigrapsus nudus and Hemigrapsus oregonensis. Snails were collected from different locations in the natural environment, suggesting that the research involved organisms collected in nature."

In [85]:
result6b = qa_chain_2({"question": query6, "chat_history": chat_history})

In [86]:
result6b["answer"]

'The scientific names of the collected species mentioned in this paper are:\n\n- Lacuna vincta\n- Littorina scutulata\n- Hemigrapsus nudus\n- Hemigrapsus oregonensis'

In [87]:
result7b = qa_chain_2({"question": query7, "chat_history": chat_history})

In [88]:
result7b["answer"]

'Yes, the paper mentions the locations where the species were collected. Lacuna vincta was collected at False Bay and Dead Man Cove, while Littorina scutulata was collected from the rocky intertidal zone of Fourth of July Beach.'

In [89]:
result7c = qa_chain_2({"question": query7a, "chat_history": chat_history})

In [106]:
result7c["answer"]

'Based on the given information, the species collected and their respective locations are:\n\n1. Lacuna vincta - Collected at False Bay and Dead Man Cove.\n2. Littorina scutulata - Collected from the rocky intertidal zone of Fourth of July Beach.\n3. Hemigrapsus nudus - No information provided about the location of collection.\n4. Hemigrapsus oregonensis - No information provided about the location of collection.'

In [90]:
result8b = qa_chain_2({"question": query8, "chat_history": chat_history})

In [91]:
result8b["answer"]

'Yes, the specific locations mentioned in the context are False Bay, Dead Man Cove, and Fourth of July Beach.'

In [92]:
result9b = qa_chain_2({"question": query9, "chat_history": chat_history})

In [93]:
result9b["answer"]

"I'm sorry, but I don't have access to any specific coordinate locations in latitude/longitude at the moment."

In [94]:
result10b = qa_chain_2({"question": query10, "chat_history": chat_history})

In [95]:
result10b["answer"]

'The species una vincta and Littorina scutulata were found in the rocky intertidal zone at False Bay, Dead Man Cove, and Fourth of July Beach on San Juan Island, Washington. The shore crabs Hemigrapsus oregonensis and Hemigrapsus nudus were collected in the intertidal zone adjacent to the Friday Harbor Laboratories dock.'

In [96]:
result10c = qa_chain_2({"question": query10a, "chat_history": chat_history})

In [97]:
result10c["answer"]

'The specific location mentioned in the context is San Juan Island, Washington. However, the context does not provide any formal names of the habitats or locations where the specimens were collected.'

In [98]:
result11b = qa_chain_2({"question": query11, "chat_history": chat_history})

In [99]:
result11b["answer"]

'Yes, the paper mentions the year as "Summer 2014". However, it does not mention a specific date or time when the species were collected or observed.'

In [100]:
result12b = qa_chain_2({"question": query12, "chat_history": chat_history})

In [101]:
result12b["answer"]

'Yes, there are figures and tables in the paper. Specifically, there are Figure 1, Table 1A, Figure 2, Table 1B, Figure 3, and Figure 4.'