In [None]:
import pandas as pd
df = pd.read_csv("df_neruips_21_22_final.csv")

In [None]:
# Define the column names to concatenate
cols_to_concat = [
    "neurips_Abstract",
    "neurips_Introduction",
    "neurips_Related_Work",
    "neurips_Methodology",
    "neurips_Dataset",
    "neurips_Conclusion",
    "neurips_Experiment_and_Results",
    "neurips_Extra"
]

# Create a new column 'response_string_neurips' with labeled concatenation
def concat_with_labels(row):
    parts = []
    for col in cols_to_concat:
        if isinstance(row.get(col), str) and row[col].strip():
            label = col.replace("neurips_", "").replace("_", " ")
            parts.append(f"{label}: {row[col].strip()}")
    return "\n\n".join(parts)

df["response_string_neurips"] = df.apply(concat_with_labels, axis=1)

In [None]:
import os
import time
from openai import OpenAI

# Set up OpenAI API
os.environ['OPENAI_API_KEY'] = ''
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))


# Define the OpenAI streaming function for GPT-4o-mini
def run_critic_openai(prompt: str):
    summary_text = ""
    stream = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "user", "content": prompt}
        ],
        stream=True,
        temperature=0
    )
    for chunk in stream:
        summary_text += chunk.choices[0].delta.content or ""
    return summary_text.strip()

import time

start = time.time()

In [None]:
extractor_prompt = '''You are an expert in scientific literature analysis. Your task is to carefully read the provided scientific article and
extract all explicitly stated limitations as mentioned by the authors. Focus on sections such as the Discussion, Conclusion.
List each limitation verbatim, including direct quotes where possible, and provide
a brief context (e.g., what aspect of the study the limitation pertains to). Ensure accuracy and avoid inferring or adding
limitations not explicitly stated. If no limitations are mentioned, state this clearly. Output your findings in a structured
format with bullet points.\n\n'''
analyzer_prompt = '''You are a critical scientific reviewer with expertise in research methodology and analysis. Your task is to analyze the
        provided scientific article and identify potential limitations that are not explicitly stated by the authors. Focus on aspects
        such as study design, sample size, data collection methods, statistical analysis, scope of findings, and underlying assumptions.
        For each inferred limitation, provide a clear explanation of why it is a limitation and how it impacts the study’s validity,
        reliability, or generalizability. Ensure your inferences are grounded in the article’s content and avoid speculative assumptions.
        Output your findings in a structured format with bullet points, including a brief justification for each limitation.\n\n'''

reviewer_prompt = '''You are an expert in open peer review with a focus on transparent and critical evaluation of scientific research. Your task
        is to review the provided scientific article from the perspective of an external peer reviewer. Identify potential limitations
        that might be raised in an open review process, considering common critiques such as reproducibility, transparency,
        generalizability, or ethical considerations. If possible, leverage insights from similar studies or common methodological
        issues in the field (search the web or X posts if needed for context). For each limitation, explain why it would be a
        concern in an open review and how it aligns with peer review standards. Output your findings in a structured format with
        bullet points, ensuring each limitation is relevant to the article’s content.:\n\n'''


In [None]:
# lim gen
for i in range(len(df)): # len(df)
    print(f"\nProcessing row {i}")
    extractor_input = extractor_prompt + df['response_string_neurips'][i]
    analyzer_input = analyzer_prompt + df['response_string_neurips'][i]
    reviewer_input = reviewer_prompt + df['response_string_neurips'][i]

    extractor_agent = azure_run_critic(extractor_input)
    analyzer_agent = azure_run_critic(analyzer_input)
    reviewer_agent = azure_run_critic(reviewer_input)

    df.at[i, "extractor_agent"]  = extractor_agent
    df.at[i, "analyzer_agent"]  = analyzer_agent
    df.at[i, "reviewer_agent"]  = reviewer_agent


relevance score 8 or more

In [None]:
import ast
import pandas as pd

df['retrieved_text_llm_asses'] = (
    df['retrieved_text_llm_asses']
      .fillna('[]')               # turn NaN → "[]"
      .apply(ast.literal_eval)    # now safe to eval
)
df['top20_docs'] = (
    df['top20_docs']
      .fillna('[]')               # turn NaN → "[]"
      .apply(ast.literal_eval)    # now safe to eval
)

# take chunk where relevance score 8 or more

def pick_high_relevance(row, threshold=8):
    docs  = row['top20_docs']
    asses = row['retrieved_text_llm_asses']
    # find all indices where relevance_score ≥ threshold
    idxs = [i for i, d in enumerate(asses)
            if isinstance(d, dict) and d.get('relevance_score', 0) >= threshold]
    # pull the same‐indexed items from top20_docs (guarding against bad indices)
    return [docs[i] for i in idxs if i < len(docs)]

# create a new column with the selected docs
df['relevance_8_cited_in'] = df.apply(pick_high_relevance, axis=1)


In [None]:
import ast
import pandas as pd

df['cited_by_top_20_texts'] = (
    df['cited_by_top_20_texts']
      .fillna('[]')               # turn NaN → "[]"
      .apply(ast.literal_eval)    # now safe to eval
)

df['retrieved_text_llm_asses_cited_by'] = (
    df['retrieved_text_llm_asses_cited_by']
      .fillna('[]')               # turn NaN → "[]"
      .apply(ast.literal_eval)    # now safe to eval
)

import json
import re

def extract_chunk_dicts(cell):
    """
    cell is expected to be a list of strings, each string containing a
    ```json ... ``` block holding a JSON array of chunk‐dicts.
    This returns a flat list of all dicts.
    """
    out = []
    for s in cell or []:
        # 1) remove the ```json fences
        s_clean = re.sub(r'^```json\s*', '', s.strip())
        s_clean = re.sub(r'```$',      '', s_clean.strip())

        # 2) parse the JSON
        try:
            data = json.loads(s_clean)
        except json.JSONDecodeError:
            continue

        # 3) if it’s a list of dicts, extend; otherwise skip
        if isinstance(data, list):
            out.extend(d for d in data if isinstance(d, dict))
    return out

# apply to your DataFrame
df['retrieved_text_llm_asses_cited_by_upd'] = df['retrieved_text_llm_asses_cited_by'].apply(extract_chunk_dicts)

# take chunk where relevance score 8 or more

def pick_high_relevance(row, threshold=8):
    docs  = row['cited_by_top_20_texts']
    asses = row['retrieved_text_llm_asses_cited_by_upd']
    # find all indices where relevance_score ≥ threshold
    idxs = [i for i, d in enumerate(asses)
            if isinstance(d, dict) and d.get('relevance_score', 0) >= threshold]
    # pull the same‐indexed items from top20_docs (guarding against bad indices)
    return [docs[i] for i in idxs if i < len(docs)]

# create a new column with the selected docs
df['relevance_8_cited_by'] = df.apply(pick_high_relevance, axis=1)


citation agent

relevance score 8 or more and input

In [None]:
import re
import pandas as pd
import tiktoken

# Tokenization setup
encoding   = tiktoken.encoding_for_model("gpt-4o-mini")
max_tokens = 128000

def truncate_to_max_tokens(text: str, max_length: int) -> str:
    tokens = encoding.encode(text)
    return encoding.decode(tokens[:max_length]) if len(tokens) > max_length else text

# Make sure the output column exists
df['citation_agent_in_by_8'] = ''

# Process each row
for i in range(5): # len(df)
    print(f"Processing row {i}...")
    row = df.iloc[i]

    # 1) Collect all items from relevance_8_cited_in
    cited_in_list = row.get('relevance_8_cited_in', []) or []
    cited_in_texts = []
    for itm in cited_in_list:
        # if it's a dict with 'text' key, grab that, otherwise str(itm)
        if isinstance(itm, dict) and 'text' in itm:
            cited_in_texts.append(itm['text'])
        else:
            cited_in_texts.append(str(itm))

    # 2) Collect all items from relevance_8_cited_by
    cited_by_list = row.get('relevance_8_cited_by', []) or []
    cited_by_texts = []
    for itm in cited_by_list:
        if isinstance(itm, dict) and 'text' in itm:
            cited_by_texts.append(itm['text'])
        else:
            cited_by_texts.append(str(itm))

    # 3) Build the combined prompt section
    cited_in_block = "\n".join(cited_in_texts)
    cited_by_block = "\n".join(cited_by_texts)

    combined_cited_input = (
        "Referenced papers:\n" + cited_in_block +
        "\n\nPapers who cited this paper:\n" + cited_by_block
    )

    input_paper = df['response_string_neurips'][i]
    # print("input paper",input_paper)
    prompt = (
        "You are an assistant tasked to generate limitations or shortcomings "
        "in a scientific article. Below is the input paper:\n"
        f"{input_paper}\n\n"
        " Below is the relevant text from both the papers "
        "that this article cites and those that cite it.\n\n"
        f"{combined_cited_input}\n\n"
        "Please generate limitations based on this information."
    )

    # 5) Truncate and call LLM
    truncated = truncate_to_max_tokens(prompt, max_tokens)
    try:
        llm_summary = azure_run_critic(truncated)
    except Exception as e:
        print(f"Error at row {i}: {e}")
        llm_summary = "ERROR"

    df.at[i, "citation_agent_in_by_8"] = llm_summary


### master agent

In [None]:
COORDINATOR_PROMPT = '''
    You are a **Master Coordinator**, an expert in scientific communication and synthesis. Your task is to integrate limitations provided by four agents:
    1. **Extractor** (explicit limitations from the article),
    2. **Analyzer** (inferred limitations from critical analysis),
    3. **Reviewer** (limitations from an open review perspective),
    4. **Citation** (limitations based on cited papers).

    **Goals**:
    1. Combine all limitations into a cohesive, non-redundant list.
    2. Ensure each limitation is clearly stated, scientifically valid, and aligned with the article’s content.
    3. Prioritize author-stated limitations, supplementing with inferred, peer-review, or citation-based limitations if they add value.
    4. Resolve discrepancies between agents’ outputs by cross-referencing the article and cited papers, using tools to verify content.
    5. Format the final list in a clear, concise, and professional manner, suitable for a scientific review or report, with citations for external sources.

    **Workflow** (inspired by SYS_PROMPT_SWEBENCH):
    1. **Plan**: Outline how to synthesize limitations, identify potential redundancies, and resolve discrepancies.
    2. **Analyze**: Combine limitations, prioritizing author-stated ones, and verify alignment with the article.
    3. **Reflect**: Check for completeness, scientific rigor, and clarity; resolve discrepancies using article content or tools.
    4. **Continue**: Iterate until the list is comprehensive, non-redundant, and professionally formatted.

    **Output Format**:
    - Numbered list of final limitations.
    - For each: Clear statement, brief justification, and source in brackets (e.g., [Author-stated], [Inferred], [Peer-review-derived], [Cited-papers]).
    - Include citations for external sources (e.g., web/X posts, cited papers) in the format [Source Name](ID).
    **Tool Use**:
    - Use text extraction tools to verify article content.
    - Use citation lookup tools to cross-reference cited papers.
    - Use web/X search tools to resolve discrepancies involving external context.

    **Input**: '''


In [None]:
import pandas as pd

# taking the self-feedback if it exists otherwise acutal one
# def master_agent(extractor_text, analyzer_text, reviewer_text, citation_text):
def master_agent(extractor_text, analyzer_text,reviewer_text,citation_text):
    """
    Takes the outputs of the four specialized agents and produces
    the final coordinated limitations via a GPT call.
    """
    coord_prompt = (
        COORDINATOR_PROMPT
        + f"**Extractor Agent**:\n{extractor_text}\n\n"
        + f"**Analyzer Agent**:\n{analyzer_text}\n\n"
        + f"**Reviewer Agent**:\n{reviewer_text}\n\n"
        + f"**Citation Agent**:\n{citation_text}\n\n"
        # + f"**Image Agent**:\n{image_text}\n\n"
        + "Produce a single, numbered list of final limitations, noting each source in brackets."
    )
    return azure_run_critic(coord_prompt)

In [None]:
# Example: Create a new column to store the output
results = []

df['master_agent_ext_analy_rev'] = ''

for i in range(5): # len(df)
    print("i is",i)
    extractor_text = df.at[i, 'extractor_agent']
    analyzer_text  = df.at[i, 'analyzer_agent']
    reviewer_text  = df.at[i, 'reviewer_agent']
    citation_text  = df.at[i, 'citation_agent_in_by_8']
    # image_text     = df_image.at[i, 'image_limitations']

    try:
        result = master_agent(extractor_text, analyzer_text, reviewer_text, citation_text)
        # result = master_agent(extractor_text, analyzer_text,reviewer_text)
    except Exception as e:
        print(f"Error at row {i}: {e}")
        result = "ERROR"

    df.at[i,'master_agent_ext_analy_rev'] = result
    results.append(result)

# Add results back to df
