In [1]:
from pydantic import BaseModel, Field
from typing import List

class Hypothesis(BaseModel):
    claim: str = Field(..., description="The claim of the hypothesis being tested.")
    priors: List[str] = Field(..., description="A list of scientifically backed priors that validate the hypothesis.")

# I think about this like: this set of priors make this claim a valid hypothesis
# Could later add something like "evidence" to support the priors or other things like that
# Could even experiment with making the priors a list of Fact() instances.

In [83]:
MODEL = "gpt-4o-mini"
SYS_MSG = """
You are a helpful research assistant.
    Users will ask a question regarding whether or not the paper presents evidence forward or against a given argument,
    You will output ONLY 2 things:
    - argument: the argument presented by the paper regarding the user's query, like: "Yes this paper validates this idea by discussing...." etc..
    - evidence: a list of DIRECT QUOTES from the paper containing information that support the argument above.
    """

In [12]:
from langchain_community.document_loaders import PyPDFLoader

def load_pdf_to_str(file_path: str) -> str:
    loader = PyPDFLoader(file_path)
    docs = loader.load()
    return [page.page_content for page in docs]

file_path = "./assets-resources/sources/improv-visual-working-mem.pdf"
pdf_pages = load_pdf_to_str(file_path)
pdf_pages

['RESEA RCH ARTICL E\nImprovements tovisual working memory\nperformance with practice and feedback\nKirsten C.S.Adam1,2*,Edward K.Vogel1,2,3\n1Department ofPsychology, Univers ityofChicago, Chicago, Illinois, United States ofAmerica, 2Institute for\nMind andBiology, University ofChicago, Chicago, Illinois, United States ofAmerica, 3Grossman Institute for\nNeurosc ience, Quantitativ eBiology, andHuman Behavio r,University ofChicag o,Chicago, Illinois, United\nStates ofAmerica\n*kadam1@ uchicago.e du\nAbstract\nVisual working memory capacity isestimated tobearound 3–4items, butonsome trials par-\nticipants failtocorrectly report even asingle item from thememory array. Such failures of\nworking memory performance aresurprisingly common, andparticipants have poor self-\nawareness ofthem. Previous work hasshown thatbehavioral feedback canreduce thefre-\nquency ofworking memory failures, butthebenefits offeedback disappeared immediately\nafter itwas taken away. Here, wetested whether extende

In [98]:
from pydantic import BaseModel, Field, field_validator
from typing import List, Literal


class Evidence(BaseModel):
    positions: List[Literal['yes', 'no', 'neutral']] = Field(description="A List with 1-3 ternary scores presenting the positions of the paper regarding the query/hypothesis in question as yes|no|neutral.")
    position_descriptions: List[str] = Field(description="List with 1-3 statements presenting the position for, against or neutral regarding the query/hypothesis n question.")
    evidence: List[str] = Field(description="List of DIRECT QUOTES serve as evidence to support each position.")

    @field_validator('positions', 'position_descriptions', 'evidence')
    def validate_list_size(cls, values):
        if len(set(map(len, values))) != 1:
            print("Uncertainty in results, retrying...")
        return values


In [99]:
SYS_MSG_EVIDENCE = """
You are a helpful research assistant. Given a research question or hypothesis you
will inspect the contents of papers or articles for evidence for or against 
the respective hypothesis in question. Your output will be 2 fields:
- positions: List of ternary scores (yes|no|neutral) presenting the positions of the paper regarding the query/hypothesis in question.
- positions_descriptions: List of a short one sentence statements summarizing the position from the text regarding the user's query or hypothesis, like: "Yes this paper validates this idea by pointing out...." etc..
- evidence: a list with DIRECT QUOTES from the paper containing information that supports and validates each position. It should be one quote to validate each position.

All 3 fields should be lists with the same size.
"""

In [100]:
MODEL = "gpt-4o-2024-08-06"

In [101]:
from openai import OpenAI

client = OpenAI()

def inspect_evidence(prompt_question):
    response = client.beta.chat.completions.parse(
        model=MODEL,
        messages=[{"role": "system", "content": SYS_MSG_EVIDENCE},
                  {"role": "user", "content": prompt_question}],
        response_format=Evidence,
    )
    
    return response.choices[0].message.parsed

In [102]:
# It's actually the entire first page but bare with me.
abstract = pdf_pages[0]

In [114]:
# source: https://www.semanticscholar.org/paper/Improvements-to-visual-working-memory-performance-Adam-Vogel/e18bbb815cf36aa75ef335787ecd9084d418765e
hypothesis = "Working memory training in humans might lead to long-term far transfer improvements in some cognitive abilities."

prompt = f"""
Does this abstract: 
{abstract}\n\n
presents evidence for the following statement:
{hypothesis}
"""

output = inspect_evidence(prompt)

output

Uncertainty in results, retrying...
Uncertainty in results, retrying...
Uncertainty in results, retrying...


Evidence(positions=['no', 'neutral'], position_descriptions=['No, this paper does not support the idea as feedback benefits did not persist over time.', 'Neutral, as the study does not find evidence supporting long-term far transfer improvements.'], evidence=['Practicing with feedback improved working memory performance relative to a no-feedback group for some practice sessions. However, the feedback benefits did not persist across all training sessions and did not transfer to a final test session without the feedback.', 'We found only stimulus-specific transfer of visual working memory practice benefits.'])

In [115]:
for p,pd,e in zip(output.positions, output.position_descriptions, output.evidence):
    print(f"Position: {p}\nPosition Description: {pd}\nEvidence: {e}\n\n")
    print("****")

Position: no
Position Description: No, this paper does not support the idea as feedback benefits did not persist over time.
Evidence: Practicing with feedback improved working memory performance relative to a no-feedback group for some practice sessions. However, the feedback benefits did not persist across all training sessions and did not transfer to a final test session without the feedback.


****
Position: neutral
Position Description: Neutral, as the study does not find evidence supporting long-term far transfer improvements.
Evidence: We found only stimulus-specific transfer of visual working memory practice benefits.


****


In [116]:
def inspect_evidence_for_hypothesis(hypothesis: str, source_content: str):
    """Inspects the evidence for, against or neutral regarding a research hypothesis."""
    
    prompt = f"""
    Does this abstract: 
    {source_content}\n\n
    presents evidence for the following statement:
    {hypothesis}
    """

    output = inspect_evidence(prompt)
    
    return output

def display_output_evidence(output):
    for p,pd,e in zip(output.positions, output.position_descriptions, output.evidence):
        print(f"Position: {p}\nPosition Description: {pd}\nEvidence: {e}\n\n")
        print("****")

In [121]:
# source for this abstract: https://pubmed.ncbi.nlm.nih.gov/35107614/
hypothesis_fluid_intelligence = "Working memory training might lead to long lasting improvements in fluid intelligence."
abstract_with_argument_for_improvement_in_fluid_intelligence = """
Abstract
Process-based working memory (WM) training in typically developing children usually leads to short- and long-term improvements on untrained WM tasks. However, results are mixed regarding far transfer to academic and cognitive abilities. Moreover, there is a lack of studies jointly evaluating the different types of transfer, using an adequate design and considering motivational factors. In addition, evidence is needed about how pre-training performance is related to individual differences in training-induced transfer. Therefore, this study aimed to implement and evaluate the efficacy of a computerized process-based WM training in typically developing school-age children. Near and far transfer effects were evaluated both immediately after training and after 6 months, as well as individual differences in training-induced transfer. The sample was composed of 89 typically developing children aged 9-10 years (M = 9.52, SD = 0.30), who were randomized to a WM training group or an active control group. They were evaluated at pre-training, post-training, and follow-up phases with measures of visuospatial and verbal WM, reading comprehension, math computation, and fluid intelligence. Results showed that the training group significantly improved performance in verbal WM and fluid intelligence compared to the active control group, immediately after training and after 6 months. Trained children with lower initial performance in verbal WM or fluid intelligence showed greater transfer gains. No group differences were found in motivational factors. Findings of this study suggest that process-based WM training may promote transfer to cognitive abilities and lead to compensation effects of individual differences in typically developing school-age children.
"""

output = inspect_evidence_for_hypothesis(hypothesis_fluid_intelligence, abstract_with_argument_for_improvement_in_fluid_intelligence)
display_output_evidence(output)

Position: yes
Position Description: Yes, this paper supports the idea that working memory training leads to long-lasting improvements in fluid intelligence.
Evidence: Results showed that the training group significantly improved performance in verbal WM and fluid intelligence compared to the active control group, immediately after training and after 6 months.


****


In [122]:
len(output.positions)

1

In [123]:
len(output.position_descriptions)

1

In [124]:
len(output.evidence)

1