In [None]:
import pandas as pd
df = pd.read_csv("df_neurips_limitation_and_OR_with_cited_data.csv")

In [None]:
Graph = '''
You are an expert in scientific research analysis. I have processed a scientific article using Stanford CoreNLP, resulting in a
JSON output containing structured data such as tokenized text, part-of-speech tags, named entity recognition, dependency parses, and optionally sentiment or coreference information. Using this JSON data, identify and generate a concise list of potential limitations of the study described in the article. Consider aspects such as:

Methodological limitations (e.g., study design, data collection, or analysis methods).
Sample size or population constraints (e.g., small sample, lack of diversity, or selection bias).
Generalizability issues (e.g., limited scope of findings or context-specific results).
Data quality or measurement issues (e.g., reliance on self-reported data or missing variables).
Any other relevant limitations inferred from the text or study description.
Please provide the limitations in a clear, bullet-point format, ensuring each limitation is specific, evidence-based, and derived from the content or structure of the provided JSON data. If certain information is ambiguous or missing in the JSON, note any assumptions made. Here is the JSON data for analysis:

[Insert JSON data here]

Output the limitations in the following format:

Limitation 1: [Description of the limitation, with reference to specific elements in the JSON if applicable].
Limitation 2: [Description of the limitation, with reference to specific elements in the JSON if applicable].

'''

### LLM agents (lim gen)

In [None]:
from langchain.schema import SystemMessage, HumanMessage


system_prompt = '''You are a helpful, respectful, and honest assistant for generating limitations or shortcomings of a research paper.
 Generate limitations or shortcomings for the following passages from the scientific paper.'''

# ─── your existing setup ─────────────────────────────────────────────────────
chat_llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

# ─── new unified helper ───────────────────────────────────────────────────────
def run_critic(prompt: str,*,system_prompt: str | None = None) -> str:
    """
    Wraps ChatOpenAI to (optionally) send a system prompt + your user prompt,
    and returns the assistant's reply as a stripped string.
    """
    messages: list[SystemMessage | HumanMessage] = []

    if system_prompt:
        messages.append(SystemMessage(content=system_prompt))

    messages.append(HumanMessage(content=prompt))

    # this will wait for the full response (no streaming)
    response = chat_llm.invoke(messages)
    return response.content.strip()

In [None]:
# # assesment
import re
import json

def llm_assessment(agent_texts: dict,
                   agent_prompts: dict,
                   metrics=None):
    if metrics is None:
        metrics = ['Depth','Originality','Actionability','Topic_Coverage']

    # 1) Fire off the collective judge prompt
    raw_response = run_critic(
        JUDGE_PROMPT +
        "".join(f"**{agent} Agent**:\n{agent_texts[agent]}\n\n"
                for agent in agent_prompts)
    )
    return raw_response

### coreNLP (graph)

In [None]:
!pip -q install stanza
!pip -q install networkx
!pip -q install matplotlib

In [None]:
!pip -q install --upgrade stanza

In [None]:
import stanza

# This will download the CoreNLP models and start the server.
# It might take a few minutes the first time you run it.
stanza.install_corenlp()



In [None]:
import time
import os
import json
import re
import pandas as pd

from stanza.server import CoreNLPClient
import stanza
import networkx as nx
import matplotlib.pyplot as plt

def create_graph_from_text(text, client):
    """Builds a dependency graph for a single sentence using an existing CoreNLPClient."""
    ann = client.annotate(text)
    if not ann.sentence:
        return nx.DiGraph()
    sentence = ann.sentence[0]
    G = nx.DiGraph()
    for dep in sentence.enhancedPlusPlusDependencies.edge:
        gov = sentence.token[dep.source - 1]
        depn = sentence.token[dep.target - 1]
        gov_label = f"{gov.word} ({gov.pos})"
        dep_label = f"{depn.word} ({depn.pos})"
        G.add_edge(gov_label, dep_label, label=dep.dep)
    return G

def visualize_graph(graph):
    if not graph.nodes():
        return
    pos = nx.spring_layout(graph, k=1.5, iterations=50)
    plt.figure(figsize=(16, 10))
    nx.draw(graph, pos, with_labels=True, node_size=3000,
            node_color='lightgreen', font_size=10, font_weight='bold',
            width=2, edge_color='grey', arrowsize=20)
    edge_labels = nx.get_edge_attributes(graph, 'label')
    nx.draw_networkx_edge_labels(graph, pos, edge_labels=edge_labels,
                                 font_color='darkred', font_size=9)
    plt.title("Dependency Graph from CoreNLP")
    plt.show()

if __name__ == "__main__":
    # 1) Download & install the CoreNLP models (only needs to run once ever)
    stanza.install_corenlp()

    # 2) Spin up a single server
    with CoreNLPClient(
            annotators=['tokenize', 'ssplit', 'pos', 'lemma', 'depparse'],
            memory='4G',
            be_quiet=True
        ) as client:

        df['Graph_Agent_json'] = None

        # 3) Loop through your texts, reusing the same client
        for idx, text in df['Input_Query_rewrite'].items():
            graph = create_graph_from_text(text or "", client)
            if graph.nodes():
                node_link = nx.node_link_data(graph)
                df.at[idx, 'Graph_Agent_json'] = json.dumps(node_link, indent=2)
            else:
                df.at[idx, 'Graph_Agent_json'] = None

            print(f"Processed row {idx}")
            time.sleep(40)

        # 4) (Optionally) visualize the first one
        first_json = df.at[0, 'Graph_Agent_json']
        if first_json:
            first_graph = create_graph_from_text(df.at[0, 'Input_Query_rewrite'], client)
            visualize_graph(first_graph)

    print("Finished generating JSON for all rows.")


### Limitation generation (graph)

In [None]:
import ast
import tiktoken
import os
import base64
import re
import json
import time
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from langchain.text_splitter import CharacterTextSplitter
from rank_bm25 import BM25Okapi
from langchain.vectorstores import FAISS
from langchain.retrievers import BM25Retriever
from langchain.embeddings import HuggingFaceEmbeddings
from openai import AzureOpenAI
from openai import AzureOpenAI, RateLimitError
# ─── token helpers ─────────────────────────────────────────────────────────────

MAX_CONTEXT_TOKENS = 127_000
MODEL_NAME = "gpt-4o-mini"

def count_tokens(text: str, model: str = MODEL_NAME) -> int:
    enc = tiktoken.encoding_for_model(model)
    return len(enc.encode(text, disallowed_special=()))

endpoint = os.getenv("ENDPOINT_URL", "")
deployment = os.getenv("DEPLOYMENT_NAME", "gpt-4o-mini")
subscription_key = os.getenv("AZURE_OPENAI_API_KEY", "")

# from azure.ai.openai import AzureOpenAI, RateLimitError  # adjust imports if needed

client = AzureOpenAI(
    azure_endpoint=endpoint,
    api_key=subscription_key,
    api_version="2025-01-01-preview",
)

def azure_run_critic(prompt: str, retries: int = 1):
    for attempt in range(retries + 1):
        try:
            resp = client.chat.completions.create(
                model=deployment,
                messages=[{"role": "user", "content": prompt}],
                temperature=0,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0,
                stream=False
            )
            return resp.choices[0].message.content.strip()
        except RateLimitError:
            if attempt < retries:
                time.sleep(60)
            else:
                raise

# Now your batch‐processing loop:
all_generated_summary = []
start_time = time.time()


for i in range(len(df)): # len(df)
    print(f"\nProcessing row {i}")

    graph_input  = Graph + df['Graph_Agent_json'][i]
    graph_agent = azure_run_critic(graph_input)

    df.at[i, "graph_agent"]  = graph_agent

df.to_csv("df_neurips_limitation_and_OR_with_cited_data.csv",index=False)

In [None]:
# only from extractor, analyzer, reviewer and graph
COORDINATOR_PROMPT = '''
    You are a **Master Coordinator**, an expert in scientific communication and synthesis. Your task is to integrate limitations provided
    by four agents:
    1. **Extractor** (explicit limitations from the article),
    2. **Analyzer** (inferred limitations from critical analysis),
    3. **Reviewer** (limitations from an open review perspective),
    4. **Graph** (derived limitations from graph),
    5. **Citation** (limitations based on cited papers).


    **Goals**:
    1. Combine all limitations into a cohesive, non-redundant list.
    2. Ensure each limitation is clearly stated, scientifically valid, and aligned with the article’s content.
    3. Prioritize limitations explicitly mentioned by the authors, supplementing them with inferred or peer-review-based limitations
    only if they add value.
    4. Format the final list in a clear, concise, and professional manner, suitable for a scientific review or report, with citations for
    external sources.

    **Workflow** (inspired by SYS_PROMPT_SWEBENCH):
    1. **Plan**: Outline how to synthesize limitations, identify potential redundancies, and resolve discrepancies.
    2. **Analyze**: Combine limitations, prioritizing author-stated ones, and verify alignment with the article.
    3. **Reflect**: Check for completeness, scientific rigor, and clarity; resolve discrepancies using article content or tools.
    4. **Continue**: Iterate until the list is comprehensive, non-redundant, and professionally formatted.

    **Output Format**:
    - Numbered list of final limitations.
    - For each: Clear statement, brief justification, and source in brackets (e.g., [Author-stated], [Inferred], [Peer-review-derived],
    [Cited-papers], [Graph]).
    - Include citations for external sources (e.g., web/X posts, cited papers) in the format [Source Name](ID).
    **Tool Use**:
    - Use text extraction tools to verify article content.
    - Use citation lookup tools to cross-reference cited papers.
    - Use web/X search tools to resolve discrepancies involving external context.

 '''


In [None]:
import os
import base64
import re
import json
import time
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from langchain.text_splitter import CharacterTextSplitter
from rank_bm25 import BM25Okapi
from langchain.vectorstores import FAISS
from langchain.retrievers import BM25Retriever
from langchain.embeddings import HuggingFaceEmbeddings
from openai import AzureOpenAI
from openai import AzureOpenAI, RateLimitError

df['master_agent_with_graph'] = ''


for i in range(len(df)): # len(df)
    print("i is",i)
    extractor_text = df1['extractor_agent'][i]
    analyzer_text = df1['analyzer_agent'][i]
    reviewer_text = df1['reviewer_agent'][i]
    graph_text = df['graph_agent'][i]
    citation_text = df1['citation_agent'][i]
    coord_prompt = (
            COORDINATOR_PROMPT
            + f"**Extractor Agent**:\n{extractor_text}\n\n"
            + f"**Analyzer Agent**:\n{analyzer_text}\n\n"
            + f"**Reviewer Agent**:\n{reviewer_text}\n\n"
            + f"**Graph Agent**:\n{graph_text}\n\n"
            + f"**Citation Agent**:\n{citation_text}\n\n"
            + "Produce a single, numbered list of final limitations, noting each source in brackets."
        )
    df.at[i, 'master_agent_with_graph'] = run_critic(coord_prompt)

df.to_csv("df_neurips_limitation_and_OR_with_cited_data.csv",index=False)