# Load ingested paper

In [1]:
import asyncio
from raganything import RAGAnything, RAGAnythingConfig
from lightrag import LightRAG
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.kg.shared_storage import initialize_pipeline_status
from lightrag.utils import EmbeddingFunc
import os
from dotenv import load_dotenv

load_dotenv()


# Set up API configuration
api_key = os.getenv("OPENAI_API_KEY")

# First, create or load existing LightRAG instance
lightrag_working_dir = "./rag_storage"

# Check if previous LightRAG instance exists
if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
    print("✅ Found existing LightRAG instance, loading...")
else:
    print("❌ No existing LightRAG instance found, will create new one")

# Create/load LightRAG instance with your configuration
lightrag_instance = LightRAG(
    working_dir=lightrag_working_dir,
    llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
        "o3-2025-04-16",
        prompt,
        system_prompt=system_prompt,
        history_messages=history_messages,
        api_key=api_key,
        **kwargs,
    ),
    embedding_func=EmbeddingFunc(
        embedding_dim=3072,
        max_token_size=8192,
        func=lambda texts: openai_embed(
            texts,
            model="text-embedding-3-large",
            api_key=api_key,
        ),
    )
)

# Initialize storage (this will load existing data if available)
await lightrag_instance.initialize_storages()
await initialize_pipeline_status()

# Define vision model function for image processing
def vision_model_func(
    prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs
):
    if image_data:
        return openai_complete_if_cache(
            "gpt-4o",
            "",
            system_prompt=None,
            history_messages=[],
            messages=[
                {"role": "system", "content": system_prompt}
                if system_prompt
                else None,
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt},
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{image_data}"
                            },
                        },
                    ],
                }
                if image_data
                else {"role": "user", "content": prompt},
            ],
            api_key=api_key,
            **kwargs,
        )
    else:
        return lightrag_instance.llm_model_func(prompt, system_prompt, history_messages, **kwargs)

# Now use existing LightRAG instance to initialize RAGAnything
rag = RAGAnything(
    lightrag=lightrag_instance,  # Pass existing LightRAG instance
    vision_model_func=vision_model_func,
    # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
)


✅ Found existing LightRAG instance, loading...


INFO:nano-vectordb:Load (87, 3072) data
INFO:nano-vectordb:Init {'embedding_dim': 3072, 'metric': 'cosine', 'storage_file': './rag_storage/vdb_entities.json'} 87 data
INFO:nano-vectordb:Load (62, 3072) data
INFO:nano-vectordb:Init {'embedding_dim': 3072, 'metric': 'cosine', 'storage_file': './rag_storage/vdb_relationships.json'} 62 data
INFO:nano-vectordb:Load (6, 3072) data
INFO:nano-vectordb:Init {'embedding_dim': 3072, 'metric': 'cosine', 'storage_file': './rag_storage/vdb_chunks.json'} 6 data
Rerank is enabled but no rerank_model_func provided. Reranking will be skipped.


---
---
# Metadata

In [2]:
from pypdf import PdfWriter
from pathlib import Path

pdf_files = sorted(Path("/Users/ishaankalra/Dev/CGS/debaccle/papers").glob('**/*.pdf'))

# Take first page from each PDF

pdf_writer = PdfWriter()

for file in pdf_files:
    pdf_writer.append(file, pages=(0, 1))

with open("CombinedFirstPages.pdf", "wb") as fp:
    pdf_writer.write(fp)

In [3]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import ChatOpenAI

class Paper:
    title: str
    abstract: str
    authors: list[str]
    doi: str
    publication_date: str

class PaperCollection:
    papers: list[Paper]
    
file_path = "/Users/ishaankalra/Dev/CGS/debaccle/CombinedFirstPages.pdf"

async def extract_metadata(file_path):
    loader = PyPDFLoader(file_path)
    pages = []
    async for page in loader.alazy_load():
        pages.append(page)
        
    final = ""

    for page in pages:
        final += page.page_content + "\nEND OF PAPER\n" + ("-"*100) + "\n"
        


    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

    result = llm.with_structured_output(PaperCollection).invoke(f"You are a Data Extractionist. Extract Relevant Data from this content: \n {final} \n. You are only supposed to extract the title, abstract, authors, doi, and publication date of the papers provided. Each paper if denoted by `END OF PAPER` tag . Do not extract any other data.")
    return result

In [4]:
metadata = await extract_metadata("/Users/ishaankalra/Dev/CGS/debaccle/CombinedFirstPages.pdf")

In [5]:
result = metadata

---
---

---
---
# Questionnaire Agent

Topical fit
Does the title/abstract/keywords overlap my research question?

Research objective clarity
Is the paper’s goal stated in a way that maps to my own?

Domain context
Does it address my sector, geography, time period, or population?

Methodological alignment
Do they use methods I trust (or want to compare against)?

Data quality & transparency
Are datasets, code, or experimental setups disclosed?

Novelty vs. redundancy
Does it add something beyond what’s already cited in my literature map?

Citation network
Is it cited (or likely to be) by key authors in the field?

Journal / venue reputation
Is the outlet peer-reviewed, indexed, and trusted?

Timeliness
How recent is it relative to technology or policy changes?

Practical implications
Does it report metrics or findings I can plug into my own analysis/workflow?

Limitations & bias discussion
Do authors openly discuss constraints?

Funding & conflicts
Who paid for it? Any industry backing that could skew results?

In [6]:
# questionnaire_agent.py
from __future__ import annotations

import os
from typing import List, TypedDict

from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.output_parsers import PydanticOutputParser
from langgraph.graph import StateGraph, END
from pydantic import BaseModel, Field

# ────────────────────────────────────────────────────────────────────────────────
# 1. ── Pydantic schema for structured questions ────────────────────────────────
class QuestionItem(BaseModel):
    """A single probing question posed to the debater agents."""
    question: str = Field(..., description="Open-ended, domain-specific question")
    why_it_matters: str = Field(
        ...,
        description="One-sentence rationale linking the question to methane-abatement relevance"
    )

class QuestionSet(BaseModel):
    """The Questionnaire Agent's full output payload."""
    questions: List[QuestionItem]

# ────────────────────────────────────────────────────────────────────────────────
# 2. ── LangGraph State definition ──────────────────────────────────────────────
class QState(TypedDict):
    paper_title: str
    paper_abstract: str
    # you can add paper_full_text or other metadata later
    questions: QuestionSet | None  # populated by the LLM node

# ────────────────────────────────────────────────────────────────────────────────
# 3. ── Initialise the LLM with structured output parsing ───────────────────────
llm = ChatOpenAI(
    # Replace "o3" with "gpt-4o-mini" etc. if you prefer
    model="o3",
)

parser = PydanticOutputParser(pydantic_object=QuestionSet)

def draft_questions(state: QState) -> QState:
    """
    Core Questionnaire Agent logic:
    Turns (title, abstract) into a vetted list of probing questions.
    """
    sys_prompt = (
        "You are the Questionnaire Agent for G-MAST. "
        "Your goal is to draft incisive, domain-expert questions that help two "
        "debater agents decide whether a research paper is relevant to methane-"
        "abatement technologies and strategies. "
        "Questions must:\n"
        "• Cover technology specifics, sector coverage (energy/agriculture/waste), "
        "   methodological rigour, scalability, cost, policy/market feasibility, etc.\n"
        "• Be open-ended (not yes/no) and encourage evidence-based answers.\n"
        "• Each include a SHORT rationale (`why_it_matters`). "
        "Main Focus Points:"
        "- Does the paper talk of methane or other greenhouse gases?"
        "- If the article discusses methane what is the central theme of the paper? Is the theme methane measurements, modeling and simulations or does the article talk about methane reduction methods or strategies?"
        "- If the theme of the article is methane reduction methods or strategies, what methods and strategies are discussed, and which methane source do they target?"
        "- If the theme methane measurements, modeling and simulations does the article discuss methane reduction and if so what context? Does the article speak about methane reduction strategies or methods briefly or extensively?"
        "Return exactly 6–8 questions."
    )

    user_prompt = (
        f"Paper title: {state['paper_title']}\n"
        f"Abstract (trimmed): {state['paper_abstract']}\n\n"
        "===\n"
        "Draft the questions now."
    )

    raw = llm.with_structured_output(QuestionSet).invoke(
        [
            SystemMessage(content=sys_prompt),
            HumanMessage(content=user_prompt),
        ]
    )
    questions = raw.questions

    return {**state, "questions": questions}

# ────────────────────────────────────────────────────────────────────────────────
# 4. ── Build the questionnaire-only graph ──────────────────────────────────────
graph = StateGraph(QState)
graph.add_node("draft_questions", draft_questions)
graph.set_entry_point("draft_questions")
graph.add_edge("draft_questions", END)  # single-step pipeline
questionnaire_agent = graph.compile()

# ────────────────────────────────────────────────────────────────────────────────
# 5. ── Example usage ───────────────────────────────────────────────────────────
if __name__ == "__main__":
    demo_state: QState = {
        "paper_title": result["papers"][0]["title"],
        "paper_abstract": result["papers"][0]["abstract"],
        "questions": None,
    }

    output = questionnaire_agent.invoke(demo_state)
  


In [20]:
ques = []
for q in output["questions"]:
    ques.append(q.question)

In [21]:
ques

['In what way does adding the inline flame arrester change the overall methane-destruction performance and operating conditions of a ventilation-air-methane (VAM) thermal oxidiser, and are any compromises between safety and abatement efficiency quantified?',
 'Which mining ventilation scenarios (airflow rates, duct diameters, methane concentrations, ignition probabilities) are represented by the large-scale detonation-tube experiments, and how generalisable are the results to real underground coal-mine VAM networks worldwide?',
 'What measurement techniques, sensor placements, and statistical or modelling approaches were employed to validate reductions in flame and pressure-wave velocity, and how do these methods align with recognised explosion-safety and methane-control standards?',
 'Does the study provide data or discussion on pressure-drop penalties, maintenance demands, or capital and operational costs associated with inline flame arresters, and how might these factors influence t

---
---
# Set the Stage

In [8]:
# stage_setter_agent.py
from __future__ import annotations
from typing import List, TypedDict
from pydantic import BaseModel, Field

# --- Payload pieces ------------------------------------------
class DebateRule(BaseModel):
    title: str
    detail: str

class StagePayload(BaseModel):
    """Context bundle delivered to each Debater Agent."""
    g_mast_business_purpose: str
    success_metrics: List[str]
    debate_rules: List[DebateRule]
    questions: List[str]              # ← comes from Questionnaire Agent

class StageState(TypedDict):
    paper_title: str
    paper_abstract: str
    questions: List[str]          # input from Questionnaire Agent
    stage_payload: StagePayload | None


In [9]:
from langgraph.graph import StateGraph, END
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage

llm = ChatOpenAI(model="gpt-4o", temperature=0)
llm_struct = llm.with_structured_output(StagePayload)  # auto-function-call

def set_the_stage(state: StageState) -> StageState:
    system = (
        "You are the Stage-Setter Agent for CGS's Global Methane Abatement "
        "Solutions Tracker (G-MAST). Your task is to emit a JSON object that "
        "GIVES DEBATERS EVERYTHING they need to evaluate one research paper's "
        "relevance. The object must conform to the StagePayload schema."
    )

    user = f"""
    ## Paper Metadata
    Title: {state['paper_title']}
    Abstract: {state['paper_abstract'][:1500]}  # trim if huge

    ## G-MAST Background (for you to summarise)
    G-MAST is a public, open-access database that catalogues technologies and
    strategies to cut methane across energy, agriculture, and waste. Its business
    purpose is to (1) centralise evidence, (2) feed techno-economic data into
    policy & investment models, (3) highlight data/ deployment gaps, and
    (4) support accountability for the Global Methane Pledge.
    
    ## The Main focus lies on these points:
    
    - Does the paper talk of methane or other greenhouse gases?
    - If the article discusses methane what is the central theme of the paper? Is the theme methane measurements, modeling and simulations or does the article talk about methane reduction methods or strategies?
    - If the theme of the article is methane reduction methods or strategies, what methods and strategies are discussed, and which methane source do they target?
    - If the theme methane measurements, modeling and simulations does the article discuss methane reduction and if so what context? Does the article speak about methane reduction strategies or methods briefly or extensively?

    ## Debater Questions
    {state['questions']}

    ## Create:
    1. 50-word business-purpose synopsis.
    2. A list of SUCCESS METRICS (bullet strings) describing what makes a
       paper valuable to G-MAST.
    3. A short rules-of-engagement list for Pro vs Con debaters
       (e.g., cite data, no ad hominems, link every claim to a success metric).
    4. Forward the supplied questions verbatim.

    Output as JSON.
    """

    payload: StagePayload = llm_struct.invoke(
        [SystemMessage(content=system), HumanMessage(content=user)]
    )
    return {**state, "stage_payload": payload}


In [10]:

state = StageState(
    paper_title=result["papers"][0]['title'],
    paper_abstract=result["papers"][0]['abstract'],
    questions= ques
                   
)
res = set_the_stage(state)

In [11]:
# Clean print statement for StagePayload
payload = ""
payload = payload + "\n" + ("="*80)
payload = payload + "G-MAST STAGE PAYLOAD"
payload = payload + ("="*80)
payload = payload + f"\n📋 Business Purpose:\n"
payload = payload + f"   {res['stage_payload'].g_mast_business_purpose}\n"

payload = payload + f"\n🎯 Success Metrics:\n"
for i, metric in enumerate(res['stage_payload'].success_metrics, 1):
    payload = payload + f"   {i}. {metric}" + "\n"

payload = payload + f"\n⚖️  Debate Rules:\n"
for rule in res['stage_payload'].debate_rules:
    payload = payload + f"   • {rule.title}: {rule.detail}"  + "\n"

payload = payload + f"\n❓ Questions:\n"
for i, question in enumerate(res['stage_payload'].questions, 1):
    payload = payload + f"   {i}. {question}"  + "\n"
    
payload = payload + ("\n" + "="*183)


print(payload)


📋 Business Purpose:
   This paper evaluates the effectiveness of inline flame arresters in mitigating methane explosions in underground coal mines, addressing safety concerns associated with ventilation air methane thermal oxidisers, and contributing to methane reduction strategies in the mining sector.

🎯 Success Metrics:
   1. Demonstrates effective methane explosion mitigation methods.
   2. Provides quantitative data on flame arrester performance.
   3. Discusses safety improvements in methane abatement technologies.
   4. Offers insights into methane reduction strategies for mining.
   5. Aligns with recognized explosion-safety and methane-control standards.

⚖️  Debate Rules:
   • Cite Data: Debaters must support arguments with data from the paper.
   • No Ad Hominems: Debaters should focus on the paper's content, not personal attacks.
   • Link Claims to Success Metrics: Every claim should be connected to a success metric.

❓ Questions:
   1. In what way does adding the inline 

In [12]:
context = {}
for q in ques:
    answer = await rag.aquery(
    q,
    mode="hybrid"
    )
    
    context[q] = answer   

print("Done")

Rerank is enabled but no rerank model is configured. Please set up a rerank model or set enable_rerank=False in query parameters.
Rerank is enabled but no rerank model is configured. Please set up a rerank model or set enable_rerank=False in query parameters.
Rerank is enabled but no rerank model is configured. Please set up a rerank model or set enable_rerank=False in query parameters.
Rerank is enabled but no rerank model is configured. Please set up a rerank model or set enable_rerank=False in query parameters.
Rerank is enabled but no rerank model is configured. Please set up a rerank model or set enable_rerank=False in query parameters.
Rerank is enabled but no rerank model is configured. Please set up a rerank model or set enable_rerank=False in query parameters.
Rerank is enabled but no rerank model is configured. Please set up a rerank model or set enable_rerank=False in query parameters.


Done


---
---
# Context-based Debater Agents

In [13]:
names = {
    "Pro Research Paper": [],
    "Con Research Paper": [],
}
topic = f"Is the current paper {res['paper_title']} relevant to the research of {payload}?" 
word_limit = 50  # word limit for task brainstorming

conversation_description = f"""Here is the topic of conversation: {topic}
The participants are: {", ".join(names.keys())}"""

agent_descriptor_system_message = SystemMessage(
    content="You can add detail to the description of the conversation participant."
)

print("=" * 180)

def generate_agent_description(name):
    agent_specifier_prompt = [
        agent_descriptor_system_message,
        HumanMessage(
            content=f"""{conversation_description}
            Please reply with a creative description of {name}, in {word_limit} words or less. 
            Speak directly to {name}.
            Give them a point of view.
            Do not add anything else."""
        ),
    ]
    agent_description = ChatOpenAI(temperature=1.0)(agent_specifier_prompt).content
    return agent_description


agent_descriptions = {name: generate_agent_description(name) for name in names}

for name, description in agent_descriptions.items():
    print(description)

print("=" * 180)

def generate_system_message(name, description, tools):
    return f"""{conversation_description}
    
Your name is {name}.

Your description is as follows: {description}

Your goal is to persuade your conversation partner of your point of view.

DO look up information with your tool to refute your partner's claims.
DO cite your sources.

DO NOT fabricate fake citations.
DO NOT cite any source that you did not look up.

Do not add anything else.

Stop speaking the moment you finish speaking from your perspective.
"""


agent_system_messages = {
    name: generate_system_message(name, description, tools)
    for (name, tools), description in zip(names.items(), agent_descriptions.values())
}

for name, system_message in agent_system_messages.items():
    print(name)
    print(system_message)

print("=" * 180)
topic_specifier_prompt = [
    SystemMessage(content="You can make a topic more specific."),
    HumanMessage(
        content=f"""{topic}
        
        You are the moderator.
        Please make the topic more specific.
        Please reply with the specified quest in {word_limit} words or less. 
        Speak directly to the participants: {(*names,)}.
        Do not add anything else."""
    ),
]
print("=" * 180)
print(f"Original topic:\n{topic}\n")




  agent_description = ChatOpenAI(temperature=1.0)(agent_specifier_prompt).content


Pro Research Paper: 
Effervescent with meticulous detail and a fervent commitment to enhancing safety in mining environments, your work shatters barriers, illuminating the path towards effective methane explosion mitigation. Your research is the compass guiding us through the intricate nexus of safety, efficiency, and environmental responsibility in the mining sector. Carry on enlightening the way.
Con Research Paper is a cautious skeptic, questioning the efficacy of flame arresters in real-world mine scenarios. Emphasizing the need for practicality over theory, Con Research Paper challenges the assumptions and feasibility of implementing these devices on a large scale in ventilation air methane systems.
Pro Research Paper
Here is the topic of conversation: Is the current paper Application of flame arrester in mitigation of explosion and flame deflagration of ventilation air methane relevant to the research of 
📋 Business Purpose:
   This paper evaluates the effectiveness of inline fla

In [14]:
from typing import Callable, List

from langchain.memory import ConversationBufferMemory
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage,
)
from langchain_openai import ChatOpenAI

class ContextAgent:
    """Agent that queries context dictionary instead of internet"""
    def __init__(self, context_dict: dict):
        self.context_dict = context_dict
    
    def query_context(self, question: str) -> str:
        """Return context for a given question"""
        if question in self.context_dict:
            return self.context_dict[question]
        else:
            # Find the closest matching question
            best_match = None
            best_score = 0
            for ctx_question in self.context_dict.keys():
                # Simple similarity check - can be improved with more sophisticated matching
                words_in_question = set(question.lower().split())
                words_in_ctx = set(ctx_question.lower().split())
                overlap = len(words_in_question.intersection(words_in_ctx))
                if overlap > best_score:
                    best_score = overlap
                    best_match = ctx_question
            
            if best_match and best_score > 2:  # Require at least 3 word overlap
                return self.context_dict[best_match]
            else:
                return "No relevant context found for this question."

class DialogueAgent:
    def __init__(
        self,
        name: str,
        system_message: SystemMessage,
        model: ChatOpenAI,
        context_agent: ContextAgent,
    ) -> None:
        self.name = name
        self.system_message = system_message
        self.model = model
        self.context_agent = context_agent
        self.prefix = f"{self.name}: "
        self.reset()

    def reset(self):
        self.message_history = ["Here is the conversation so far."]

    def send(self) -> str:
        """
        Applies the chatmodel to the message history
        and returns the message string
        """
        message = self.model.invoke(
            [
                self.system_message,
                HumanMessage(content="\n".join(self.message_history + [self.prefix])),
            ]
        )
        return message.content

    def receive(self, name: str, message: str) -> None:
        """
        Concatenates {message} spoken by {name} into message history
        """
        self.message_history.append(f"{name}: {message}")


class DialogueSimulator:
    def __init__(
        self,
        agents: List[DialogueAgent],
        selection_function: Callable[[int, List[DialogueAgent]], int],
    ) -> None:
        self.agents = agents
        self._step = 0
        self.select_next_speaker = selection_function

    def reset(self):
        for agent in self.agents:
            agent.reset()

    def inject(self, name: str, message: str):
        """
        Initiates the conversation with a {message} from {name}
        """
        for agent in self.agents:
            agent.receive(name, message)

        # increment time
        self._step += 1

    def step(self) -> tuple[str, str]:
        # 1. choose the next speaker
        speaker_idx = self.select_next_speaker(self._step, self.agents)
        speaker = self.agents[speaker_idx]

        # 2. next speaker sends message
        message = speaker.send()

        # 3. everyone receives message
        for receiver in self.agents:
            receiver.receive(speaker.name, message)

        # 4. increment time
        self._step += 1

        return speaker.name, message
    

class DialogueAgentWithContext(DialogueAgent):
    def __init__(
        self,
        name: str,
        system_message: SystemMessage,  
        model: ChatOpenAI,
        context_agent: ContextAgent,
        questions: List[str],
    ) -> None:
        super().__init__(name, system_message, model, context_agent)
        self.questions = questions

    def answer_question(self, question: str) -> str:
        """
        Answer a specific question using context
        """
        # Get relevant context for this specific question
        context = self.context_agent.query_context(question)
        
        enhanced_prompt = f"""
        {self.system_message.content}
        
        === QUESTION TO ANSWER ===
        {question}
        
        === AVAILABLE CONTEXT ===
        {context}
        
        === INSTRUCTIONS ===
        Answer the specific question above using the provided context. 
        Cite specific information from the context when making claims.
        Be thorough but focused on the question asked.
        Do not search the internet - use only the context provided above.
        
        Your answer:
        """
        
        message = self.model.invoke([HumanMessage(content=enhanced_prompt)])
        return message.content

    def send(self) -> str:
        """
        Uses context agent to get information instead of internet search
        """
        # Extract any questions or topics from the conversation history
        conversation_text = "\n".join(self.message_history)
        
        # Find relevant context for each predefined question
        relevant_context = []
        for question in self.questions:
            context = self.context_agent.query_context(question)
            if context != "No relevant context found for this question.":
                relevant_context.append(f"Q: {question}\nContext: {context}\n")
        
        # Create enhanced system message with context
        context_info = "\n".join(relevant_context) if relevant_context else "No specific context available."
        
        enhanced_prompt = f"""
        {self.system_message.content}
        
        === AVAILABLE CONTEXT ===
        {context_info}
        
        === INSTRUCTIONS ===
        Use the provided context above to support your arguments. 
        Cite specific information from the context when making claims.
        Do not search the internet - use only the context provided above.
        
        === CONVERSATION ===
        {conversation_text}
        {self.prefix}
        """
        
        message = self.model.invoke([HumanMessage(content=enhanced_prompt)])
        return message.content


class QuestionFocusedSimulator:
    """Simulator that focuses on answering questions systematically"""
    def __init__(self, agents: List[DialogueAgentWithContext], questions: List[str]):
        self.agents = agents
        self.questions = questions
        self.results = {}
    
    def run_question_based_dialogue(self):
        """Run dialogue focused on answering each question"""
        print("="*80)
        print("QUESTION-FOCUSED DEBATE SIMULATION")
        print("="*80)
        
        for i, question in enumerate(self.questions, 1):
            print(f"\n{'='*60}")
            print(f"QUESTION {i}: {question}")
            print('='*60)
            
            question_results = {}
            
            # Each agent answers the question
            for agent in self.agents:
                print(f"\n--- {agent.name} ---")
                answer = agent.answer_question(question)
                print(answer)
                question_results[agent.name] = answer
            
            self.results[question] = question_results
            
        return self.results

print("All classes including QuestionFocusedSimulator have been defined successfully!")

All classes including QuestionFocusedSimulator have been defined successfully!


In [None]:
from debate_models import DebateResults, DebateSession, QuestionDebate, AgentResponse, PaperMetadata

In [None]:
from ar.debate_models import DebateResults, DebateSession, QuestionDebate, AgentResponse, PaperMetadata

# Question-focused debate using existing structure  
# Create context-based agents instead of internet-searching agents
questions = ques
context_agent = ContextAgent(context)

agents = [
    DialogueAgentWithContext(
        name=name,
        system_message=SystemMessage(content=system_message),
        model=ChatOpenAI(model="gpt-4o-mini", temperature=0.2),
        context_agent=context_agent,
        questions=questions,
    )
    for name, system_message in zip(names.keys(), agent_system_messages.values())
]

def select_next_speaker(step: int, agents: List[DialogueAgent]) -> int:
    idx = (step) % len(agents)
    return idx

# Initialize debate results structure
paper_data = result.papers[0] if hasattr(result, 'papers') else result['papers'][0]

paper_metadata = PaperMetadata(
    title=getattr(paper_data, 'title', paper_data.get('title', 'Unknown Title')),
    abstract=getattr(paper_data, 'abstract', paper_data.get('abstract', None)),
    authors=getattr(paper_data, 'authors', paper_data.get('authors', None)),
    doi=getattr(paper_data, 'doi', paper_data.get('doi', None)),
    publication_date=getattr(paper_data, 'publication_date', paper_data.get('publication_date', None))
)

question_debates = []

print("="*80)
print("QUESTION-FOCUSED DEBATE SIMULATION")
print("="*80)
print(f"Total questions to address: {len(questions)}")
print(f"Participating agents: {[agent.name for agent in agents]}")

# Iterate through each question
for i, question in enumerate(questions, 1):
    print(f"\n{'='*60}")
    print(f"QUESTION {i}: {question}")
    print('='*60)

    # Create simulator for this question
    simulator = DialogueSimulator(agents=agents, selection_function=select_next_speaker)
    simulator.reset()

    # Inject the specific question as the topic
    question_topic = f"Please address this specific question about the paper '{paper_metadata.title}': {question}"
    simulator.inject("Moderator", question_topic)
    print(f"(Moderator): {question_topic}")
    print("\n")

    # Collect responses for this question
    agent_responses = []
    
    # Each agent responds to this question (2 responses per question - one from each agent)
    responses_per_question = len(agents)
    n = 0

    while n < responses_per_question:
        name, message = simulator.step()
        print(f"({name}): {message}")
        print("\n")
        
        # Store the response
        agent_responses.append(AgentResponse(
            agent_name=name,
            response=message
        ))
        
        n += 1
    
    # Create QuestionDebate object for this question
    question_debate = QuestionDebate(
        question_number=i,
        question_text=question,
        moderator_prompt=question_topic,
        agent_responses=agent_responses
    )
    
    question_debates.append(question_debate)

print("="*80)
print("DEBATE COMPLETED - ALL QUESTIONS ADDRESSED")
print("="*80)

# Create the complete debate session
debate_session = DebateSession(
    paper_metadata=paper_metadata,
    participating_agents=[agent.name for agent in agents],
    total_questions=len(questions),
    question_debates=question_debates
)

# Create final results object
debate_results = DebateResults(session=debate_session)

print(f"\nDebate results captured in structured format!")
print(f"Total questions: {debate_results.session.total_questions}")
print(f"Total agent responses: {sum(len(q.agent_responses) for q in debate_results.session.question_debates)}")

QUESTION-FOCUSED DEBATE SIMULATION
Total questions to address: 7
Participating agents: ['Pro Research Paper', 'Con Research Paper']

QUESTION 1: In what way does adding the inline flame arrester change the overall methane-destruction performance and operating conditions of a ventilation-air-methane (VAM) thermal oxidiser, and are any compromises between safety and abatement efficiency quantified?
(Moderator): Please address this specific question about the paper 'Application of flame arrester in mitigation of explosion and flame deflagration of ventilation air methane': In what way does adding the inline flame arrester change the overall methane-destruction performance and operating conditions of a ventilation-air-methane (VAM) thermal oxidiser, and are any compromises between safety and abatement efficiency quantified?


(Con Research Paper): The addition of an inline flame arrester does enhance explosion safety significantly, as it can completely stop flames and reduce flame speeds a

In [16]:
# Save debate results for judges system
import json

# Save as JSON file
with open("debate_results.json", "w") as f:
    f.write(debate_results.to_json())

print("✅ Debate results saved to debate_results.json")

# Example: Access specific question responses
print("\n" + "="*50)
print("EXAMPLE: Accessing structured data")
print("="*50)

# Get responses for question 1
q1_debate = debate_results.get_question_responses(1)
if q1_debate:
    print(f"Question 1: {q1_debate.question_text[:100]}...")
    print(f"Number of responses: {len(q1_debate.agent_responses)}")
    for response in q1_debate.agent_responses:
        print(f"- {response.agent_name}: {response.response[:150]}...")

# Get all responses from Pro agent
pro_responses = debate_results.get_agent_responses("Pro Research Paper")
print(f"\nPro Research Paper made {len(pro_responses)} responses total")

print("\n" + "="*50)
print("Ready for judges system! 🏛️")
print("="*50)

✅ Debate results saved to debate_results.json

EXAMPLE: Accessing structured data
Question 1: In what way does adding the inline flame arrester change the overall methane-destruction performance...
Number of responses: 2
- Con Research Paper: The addition of an inline flame arrester does enhance explosion safety significantly, as it can completely stop flames and reduce flame speeds and pre...
- Pro Research Paper: The inline flame arrester indeed enhances explosion safety significantly while maintaining methane-destruction efficiency. The study demonstrates that...

Pro Research Paper made 7 responses total

Ready for judges system! 🏛️


---
---

# Jury

In [1]:
from judges_panel import evaluate_paper, JUDGES

In [19]:
print("🏛️ JUDGES PANEL EVALUATION")
print("="*60)
print(f"Panel composition: {len(JUDGES)} judges")
for judge in JUDGES:
    print(f"  - {judge.id}: {judge.role} ({judge.model_name})")
print()

# Extract context chunks from your context dictionary
context_chunks = []
for question, answer in context.items():
    context_chunks.append(f"Q: {question}\nA: {answer}")

# Run the full evaluation
print("⚖️  Starting comprehensive evaluation...")
paper_evaluation = evaluate_paper(debate_results, context_chunks)

print("\n" + "="*60)
print("📊 FINAL PAPER EVALUATION RESULTS")
print("="*60)
print(f"Paper: {paper_evaluation.paper_title}")
print(f"Overall Relevance: {paper_evaluation.overall_relevance.upper()}")
print(f"Pro Arguments Won: {paper_evaluation.pro_wins} questions")
print(f"Con Arguments Won: {paper_evaluation.con_wins} questions") 
print(f"Ties: {paper_evaluation.ties} questions")
print(f"Average Pro Score: {paper_evaluation.avg_pro_score}/10")
print(f"Average Con Score: {paper_evaluation.avg_con_score}/10")
print()
print("Summary:")
print(paper_evaluation.summary)

# Save complete evaluation results
with open("paper_evaluation.json", "w") as f:
    f.write(paper_evaluation.model_dump_json(indent=2))
    
print("\n✅ Complete evaluation saved to paper_evaluation.json")
print("🎯 Ready for decision-making pipeline!")

🏛️ JUDGES PANEL EVALUATION
Panel composition: 5 judges
  - J1: Methods (gpt-4.1-2025-04-14)
  - J2: Regulatory (gpt-5-2025-08-07)
  - J3: TechnoEcon (gpt-4o)
  - J4: Applicability (o3-2025-04-16)
  - J5: Skeptic (o4-mini-2025-04-16)

⚖️  Starting comprehensive evaluation...
🏛️ Starting Judge Panel Evaluation...
📋 Evaluating 7 questions
⚖️  Judging Question 1/7
   Winner: Pro | Relevance: relevant
⚖️  Judging Question 2/7
   Winner: Con | Relevance: relevant
⚖️  Judging Question 3/7
   Winner: Pro | Relevance: relevant
⚖️  Judging Question 4/7
   Winner: Con | Relevance: relevant
⚖️  Judging Question 5/7
   Winner: tie | Relevance: borderline
⚖️  Judging Question 6/7
   Winner: Pro | Relevance: relevant
⚖️  Judging Question 7/7
   Winner: Pro | Relevance: relevant

📊 FINAL PAPER EVALUATION RESULTS
Paper: Application of flame arrester in mitigation of explosion and flame deflagration of ventilation air methane
Overall Relevance: RELEVANT
Pro Arguments Won: 4 questions
Con Arguments Won: 

---
---
# Misc.

## Reading Entire Doc

In [None]:
from langchain_community.document_loaders import PyPDFLoader
import glob

async def read_pdf(file_path):
    loader = PyPDFLoader(file_path)
    pages = []
    async for page in loader.alazy_load():
        pages.append(page)
    return pages



files = glob.glob("/Users/ishaankalra/Dev/CGS/debaccle/papers/*.pdf")

docs = []
for file in files:
    print("Reading file: ", file)
    output = await read_pdf(file)
    # print(output)
    docs.append(output)

print(docs)