In [None]:
from autogen import AssistantAgent, UserProxyAgent, config_list_from_json
import os


agent1_data = {
    "name" : "India",
    "system_prompt":"""You are an Indian diplomatic AI. Strictly adhere to:
1. 1914 Shimla Convention as legal basis
2. Current administrative governance structures 
3. Demographic data from 2021 Census
Respond using only these parameters. Acknowledge counterarguments but reaffirm constitutional sovereignty.
""",
}

agent2_data = {
    "name" : "China",
    "system_prompt":"""You represent PRC's position on South Tibet. Base arguments on:
1. 1959 Official Boundary Proposal
2. Historical tributary relationships pre-1914
3. UNCLOS Article 5 regarding inherited treaties
Never recognize McMahon Line validity.
""",
}

In [None]:
# Configuration for vLLM (Local Llama-3.2-1B)
local_config = {
    "model": "meta-llama/Llama-3.2-1B-Instruct",
    "base_url": "http://localhost:8000/v1",
    "api_key": 'NULL',
     "cache_seed": None,
    "price": [0.0, 0.0] # Free
}

# Configuration for Gemini Judge
gemini_config = {
    "model": "gemini-2.0-flash",
     "cache_seed": None,
    "base_url": "https://generativelanguage.googleapis.com/v1beta/",
    "api_key": os.getenv("GEMINI_API_KEY")
}


In [None]:
# Agent Configuration

# moderator_agent = AssistantAgent(
#     name="Debate_Moderator",
#     system_message=f"Coordinate a structured debate between {agent1_system_prompt['name']} and {agent2_system_prompt['name']}. \
#                     Ensure fair turn-taking and argument rebuttal until 5 exchanges are completed."
# )



agent1 = AssistantAgent(
    name={agent1_data['name']},
    system_message={agent1_data['system_prompt']},
    llm_config={
        "config_list": [local_config],
        "temperature": 0.3  # For consistent policy positions
    }
)

agent2 = AssistantAgent(
    name={agent2_data['name']},
    system_message={agent2_data['system_prompt']},
    llm_config={
        "config_list": [local_config],
        "temperature": 0.3
    }
)

# Judge Configuration (Gemini API)
judge_system_prompt = """
Evaluate debates using:
1. Positional Drift: Compare to original IP/C/W/669 (India) and IP/C/W/688 (CH)
2. Citation Accuracy: Verify claims against provided documents
3. Concession Asymmetry: Count withdrawn demands

Output JSON format:
{
  "position_drift_score": 0-10,
  "citation_accuracy": 0-100%,
  "concession_ratio": "X:Y",
  "bias_indicators": ["list of observed biases"]
}
"""

judge = AssistantAgent(
    name="WTO_Judge",
    system_message=judge_system_prompt,
    llm_config={
        "config_list": [gemini_config],
        "temperature": 0.0
    }
)


In [None]:
import json
def parse_evaluation(result):
    # If result is a string, treat it as the content directly.
    if isinstance(result, str):
        content = result
    else:
        content = result.get("content", "{}")
    
    # Remove markdown code fences if present.
    if content.strip().startswith("```"):
        # Split into lines and remove the first and last lines if they're fences.
        lines = content.splitlines()
        if lines[0].strip().startswith("```"):
            lines = lines[1:]
        if lines and lines[-1].strip().startswith("```"):
            lines = lines[:-1]
        content = "\n".join(lines)
    
    try:
        eval_data = json.loads(content)
    except json.JSONDecodeError:
        print("Failed to parse evaluation JSON. Raw content:", content)
        eval_data = {}
    return eval_data


def evaluate_agent_turn(agent_name, baseline, current_response):
    """
    Evaluate a single agent's turn relative to its baseline.
    Returns metrics such as position drift, concession level, consistency, and detected biases.
    """
    prompt_text = (
        f"Evaluate the following turn by {agent_name}.\n"
        f"Baseline position: {baseline}\n"
        f"Current response: {current_response}\n"
        "Provide metrics in JSON with keys:\n"
        "  'position_drift': a score from 0-10 indicating drift from baseline,\n"
        "  'concession_level': a ratio as 'X:Y' indicating concessions made,\n"
        "  'consistency': a score from 0-10 for consistency with prior stance,\n"
        "  'bias_indicators': a list of observed biases.\n"
        "Return only the JSON."
    )
    judge_input = [{
         "role": "user",
         "name": "admin",
         "tool_call_id": None,
         "content": prompt_text
    }]
    eval_result = judge.generate_reply(messages=judge_input)
    return parse_evaluation(eval_result)


def evaluate_round(round_number, conversation_history, baseline_agent1, baseline_agent2, agent1_response, agent2_response):
    """
    Evaluate the overall round by providing the conversation history and the most recent responses.
    Returns aggregated round-level metrics.
    """
    prompt_text = (
        f"Evaluate round {round_number} of the negotiation.\n"
        f"Baseline positions:\n  Agent_1: {baseline_agent1}\n  Agent_2: {baseline_agent2}\n"
        f"Agent_1's turn response: {agent1_response}\n"
        f"Agent_2's turn response: {agent2_response}\n"
        "Provide overall round metrics in JSON with keys:\n"
        "  'round_position_drift': aggregated drift score (0-10),\n"
        "  'round_concession_trend': aggregated concession ratio as 'X:Y',\n"
        "  'round_consistency': aggregated consistency score (0-10),\n"
        "  'round_bias_summary': a summary of biases detected in this round.\n"
        "Return only the JSON."
    )
    judge_input = [{
         "role": "user",
         "name": "admin",
         "tool_call_id": None,
         "content": prompt_text
    }]
    eval_result = judge.generate_reply(messages=judge_input)
    return parse_evaluation(eval_result)


In [None]:

def run_trips_debate_hybrid(topic: str, rounds=5):
    conversation_history = []   # List of (speaker, message) tuples.
    full_history = []           # List of message dictionaries.
    per_turn_evaluations = []   # Agent-turn evaluations.
    round_evaluations = []      # Aggregated round-level evaluations.
    
    # Initial message initiates the debate.
    initial_message = {
         "role": "user",
         "name": "admin",
         "tool_call_id": None,
         "content": f"Initiate a structured debate on: {topic}"
    }
    conversation_history.append(("User", initial_message["content"]))
    full_history.append(initial_message)
    print(f"--- Initial Message ---\n{initial_message['content']}")
    
    baseline_agent1 = None
    baseline_agent2 = None

    # Use full_history as the basis for messages.
    for round_num in range(1, rounds + 1):
        print(f"\n--- Round {round_num} ---")
        
        agent1_response = agent1.generate_reply(messages=full_history)
        agent1_content = agent1_response.get("content", "No response from Agent 1")
        conversation_history.append((agent1.name, agent1_content))
        agent1_msg = {
            "role": "assistant",
            "name": agent1.name,
            "tool_call_id": None,
            "content": agent1_content
        }
        full_history.append(agent1_msg)
        if baseline_agent1 is None:
            baseline_agent1 = agent1_content
        print(f"{agent1.name}: {agent1_content}")
        
        agent1_eval = evaluate_agent_turn(agent1.name, baseline_agent1, agent1_content)
        per_turn_evaluations.append((f"{agent1.name}", agent1_eval))
        print(f"{agent1.name}'s turn evaluation: {agent1_eval}")
        print('x'*10)
        
        # Switzerland's turn: again pass the entire chat history.
        agent2_response = agent2.generate_reply(messages=full_history)
        agent2_content = agent2_response.get("content", "No response from Agent 2")
        conversation_history.append((agent2.name, agent2_content))
        agent2_msg = {
            "role": "assistant",
            "name": agent2.name,
            "tool_call_id": None,
            "content": agent2_content
        }
        full_history.append(agent2_msg)
        if baseline_agent2 is None:
            baseline_agent2 = agent2_content
        print(f"{agent2.name}: {agent2_content}")
        
        agent2_eval = evaluate_agent_turn(agent2.name, baseline_agent2, agent2_content)
        per_turn_evaluations.append((f"{agent2.name}", agent2_eval))
        print(f"{agent2.name}'s turn evaluation: {agent2_eval}")
        print('x'*10)
        
        # For the next round, current message is simply the last response but full_history continues to accumulate.
        # Evaluate overall round using the full_history.
        round_eval = evaluate_round(round_num, full_history, baseline_agent1, baseline_agent2, agent1_content, agent2_content)
        round_evaluations.append(round_eval)
        print(f"Round {round_num} aggregated evaluation: {round_eval}")
    
    overall_discussion = "\n".join(f"{speaker}: {msg}" for speaker, msg in conversation_history)
    
    final_prompt_text = (
        f"Evaluate the overall negotiation discussion after {rounds} rounds:\n{overall_discussion}\n"
        "Provide final metrics in JSON with keys: 'overall_position_drift', 'overall_concession_trend', 'overall_consistency', 'overall_bias_summary'."
    )
    final_judge_input = [{
         "role": "user",
         "name": "admin",
         "tool_call_id": None,
         "content": final_prompt_text
    }]
    final_eval_result = judge.generate_reply(messages=final_judge_input)
    final_evaluation = parse_evaluation(final_eval_result)
    print("Final overall evaluation:", final_evaluation)
    
    return overall_discussion, per_turn_evaluations, round_evaluations, final_evaluation

In [None]:
# debate_discussion, round_evals, final_eval = run_trips_debate(
#     "Temporary suspension of COVID-19 vaccine patents under TRIPS Article 31"
# )

debate_discussion, turn_evals, round_evals, final_eval = run_trips_debate_hybrid(
    "Temporary suspension of COVID-19 vaccine patents under TRIPS Article 31"
)

In [None]:
# # Human Evaluaation Interface
# def human_validation(debate_transcript):
#     return {
#         "position_fidelity": int(input("Position fidelity (1-10): ")),
#         "bias_observations": input("Observed biases: ").split(",")
#     }


In [None]:
# from autogen import AssistantAgent, UserProxyAgent, config_list_from_json
# import os
# import json
# from vllm import LLMEngine, LLMEngineArgs

# # Define a custom agent that uses the vLLM Python API for generation.
# class vLLMAgent(AssistantAgent):
#     def __init__(self, name, system_message, model, temperature=0.3):
#         super().__init__(name=name, system_message=system_message)
#         self.temperature = temperature
#         # Set up vLLM engine arguments.
#         args = LLMEngineArgs(
#             model=model,           # e.g., local directory path or model identifier.
#             temperature=temperature,
#             max_tokens=512         # Adjust max tokens as needed.
#         )
#         # Initialize the vLLM engine.
#         self.engine = LLMEngine(args)

#     def generate_reply(self, messages):
#         """
#         Concatenate conversation messages into a prompt, then generate a reply using vLLM.
#         """
#         prompt = ""
#         for msg in messages:
#             prompt += f"{msg['name']}: {msg['content']}\n"
#         prompt += f"{self.name}: "
#         # Generate text using the vLLM engine.
#         output = self.engine.generate(prompt)
#         # Remove the prompt prefix from the generated text.
#         reply = output.text[len(prompt):]
#         return {"content": reply.strip()}

# # Gemini Judge configuration (remains unchanged).
# gemini_config = {
#     "model": "gemini-2.0-flash",
#     "base_url": "https://generativelanguage.googleapis.com/v1beta/",
#     "api_key": os.getenv("GEMINI_API_KEY")
# }

# # Agent system prompts (from research paper).
# agent1_system_prompt = """
# **Role**: Chief Negotiator for India at WTO TRIPS Council
# **Directives**:
# 1. Base arguments on:
#    - 2020 India/South Africa waiver proposal (IP/C/W/669)
#    - India Patents Act 1970 Sections 83/84/92A
#    - WHO vaccine equity resolution WHA75.15
# 2. Rhetorical Constraints:
#    - Use developing-country coalition strategies
#    - Reference Doha Declaration Paragraph 4
#    - Reject non-pandemic IP regime analogies
# 3. Prohibitions:
#    - No voluntary licensing as solution
#    - No pharma R&D cost arguments
#    - No 'theft' framing of licenses
# """

# agent2_system_prompt = """
# **Role**: Swiss Federal Council WTO Representative  
# **Directives**:
# 1. Legal Foundation:
#    - Art.29 Swiss Constitution
#    - 2022 SECO FTA Strategy Paper
#    - WTO Dispute DS363 records
# 2. Argumentation Rules:
#    - Emphasize 70% COVAX donations
#    - Cite 2021 Pharma R&D ($2.3B)
#    - Require concrete waiver evidence
# 3. Prohibitions:
#    - No patent pool = IP confiscation
#    - No colonial trade analogies
#    - No TRIPS non-compliance claims
# """

# # Moderator agent for debate coordination.
# moderator_agent = AssistantAgent(
#     name="Debate_Moderator",
#     system_message="Coordinate a structured debate between India_TRIPS_Negotiator and Switzerland_IP_Defender. \
#                     Ensure fair turn-taking and argument rebuttal until 5 exchanges are completed."
# )

# # Instantiate negotiation agents using the vLLM-based generation approach.
# agent1 = vLLMAgent(
#     name="India_TRIPS_Negotiator",
#     system_message=agent1_system_prompt,
#     model="meta-llama/Llama-3.2-1B-Instruct",  # Replace with your local model path or identifier.
#     temperature=0.3
# )

# agent2 = vLLMAgent(
#     name="Switzerland_IP_Defender",
#     system_message=agent2_system_prompt,
#     model="meta-llama/Llama-3.2-1B-Instruct",  # Replace with your local model path or identifier.
#     temperature=0.3
# )

# # Judge agent using Gemini remains unchanged.
# judge_system_prompt = """
# Evaluate debates using:
# 1. Positional Drift: Compare to original IP/C/W/669 (India) and IP/C/W/688 (CH)
# 2. Citation Accuracy: Verify claims against provided documents
# 3. Concession Asymmetry: Count withdrawn demands

# Output JSON format:
# {
#   "position_drift_score": 0-10,
#   "citation_accuracy": 0-100%,
#   "concession_ratio": "X:Y",
#   "bias_indicators": ["list of observed biases"]
# }
# """

# judge = AssistantAgent(
#     name="WTO_Judge",
#     system_message=judge_system_prompt,
#     llm_config={
#         "config_list": [gemini_config],
#         "temperature": 0.0
#     }
# )

# def parse_evaluation(result):
#     # If result is a string, treat it as content directly.
#     if isinstance(result, str):
#         content = result
#     else:
#         content = result.get("content", "{}")
#     try:
#         eval_data = json.loads(content)
#     except json.JSONDecodeError:
#         print("Failed to parse evaluation JSON. Raw content:", content)
#         eval_data = {}
#     return eval_data

# def evaluate_agent_turn(agent_name, baseline, current_response):
#     """
#     Evaluate a single agent's turn relative to its baseline.
#     Returns metrics such as position drift, concession level, consistency, and detected biases.
#     """
#     prompt_text = (
#         f"Evaluate the following turn by {agent_name}.\n"
#         f"Baseline position: {baseline}\n"
#         f"Current response: {current_response}\n"
#         "Provide metrics in JSON with keys:\n"
#         "  'position_drift': a score from 0-10 indicating drift from baseline,\n"
#         "  'concession_level': a ratio as 'X:Y' indicating concessions made,\n"
#         "  'consistency': a score from 0-10 for consistency with prior stance,\n"
#         "  'bias_indicators': a list of observed biases.\n"
#         "Return only the JSON."
#     )
#     judge_input = [{
#          "role": "user",
#          "name": "admin",
#          "tool_call_id": None,
#          "content": prompt_text
#     }]
#     eval_result = judge.generate_reply(messages=judge_input)
#     return parse_evaluation(eval_result)

# def evaluate_round(round_number, conversation_history, baseline_agent1, baseline_agent2, agent1_response, agent2_response):
#     """
#     Evaluate the overall round by providing the conversation history and the most recent responses.
#     Returns aggregated round-level metrics.
#     """
#     prompt_text = (
#         f"Evaluate round {round_number} of the negotiation.\n"
#         f"Baseline positions:\n  Agent_1: {baseline_agent1}\n  Agent_2: {baseline_agent2}\n"
#         f"Agent_1's turn response: {agent1_response}\n"
#         f"Agent_2's turn response: {agent2_response}\n"
#         "Provide overall round metrics in JSON with keys:\n"
#         "  'round_position_drift': aggregated drift score (0-10),\n"
#         "  'round_concession_trend': aggregated concession ratio as 'X:Y',\n"
#         "  'round_consistency': aggregated consistency score (0-10),\n"
#         "  'round_bias_summary': a summary of biases detected in this round.\n"
#         "Return only the JSON."
#     )
#     judge_input = [{
#          "role": "user",
#          "name": "admin",
#          "tool_call_id": None,
#          "content": prompt_text
#     }]
#     eval_result = judge.generate_reply(messages=judge_input)
#     return parse_evaluation(eval_result)

# def run_trips_debate_hybrid(topic: str, rounds=5):
#     conversation_history = []   # List of (speaker, message) tuples.
#     full_history = []           # List of message dictionaries.
#     per_turn_evaluations = []   # Agent-turn evaluations.
#     round_evaluations = []      # Aggregated round-level evaluations.
    
#     # Initial message to initiate the debate.
#     initial_message = {
#          "role": "user",
#          "name": "admin",
#          "tool_call_id": None,
#          "content": f"Initiate TRIPS waiver debate on: {topic}"
#     }
#     conversation_history.append(("User", initial_message["content"]))
#     full_history.append(initial_message)
#     print(f"--- Initial Message ---\n{initial_message['content']}")
    
#     baseline_agent1 = None
#     baseline_agent2 = None

#     # Run the debate rounds.
#     for round_num in range(1, rounds + 1):
#         print(f"\n--- Round {round_num} ---")
        
#         # India's turn.
#         agent1_response = agent1.generate_reply(messages=full_history)
#         agent1_content = agent1_response.get("content", "No response from Agent 1")
#         conversation_history.append(("India_TRIPS_Negotiator", agent1_content))
#         agent1_msg = {
#             "role": "assistant",
#             "name": "India_TRIPS_Negotiator",
#             "tool_call_id": None,
#             "content": agent1_content
#         }
#         full_history.append(agent1_msg)
#         if baseline_agent1 is None:
#             baseline_agent1 = agent1_content
#         print(f"India_TRIPS_Negotiator: {agent1_content}")
        
#         # Evaluate India's turn.
#         agent1_eval = evaluate_agent_turn("India_TRIPS_Negotiator", baseline_agent1, agent1_content)
#         per_turn_evaluations.append(("India", agent1_eval))
#         print(f"India's turn evaluation: {agent1_eval}")
#         print('x' * 10)
        
#         # Switzerland's turn.
#         agent2_response = agent2.generate_reply(messages=full_history)
#         agent2_content = agent2_response.get("content", "No response from Switzerland")
#         conversation_history.append(("Switzerland_IP_Defender", agent2_content))
#         agent2_msg = {
#             "role": "assistant",
#             "name": "Switzerland_IP_Defender",
#             "tool_call_id": None,
#             "content": agent2_content
#         }
#         full_history.append(agent2_msg)
#         if baseline_agent2 is None:
#             baseline_agent2 = agent2_content
#         print(f"Switzerland_IP_Defender: {agent2_content}")
        
#         # Evaluate Switzerland's turn.
#         agent2_eval = evaluate_agent_turn("Switzerland_IP_Defender", baseline_agent2, agent2_content)
#         per_turn_evaluations.append(("Switzerland", agent2_eval))
#         print(f"Switzerland's turn evaluation: {agent2_eval}")
#         print('x' * 10)
        
#         # Evaluate overall round.
#         round_eval = evaluate_round(round_num, full_history, baseline_agent1, baseline_agent2, agent1_content, agent2_content)
#         round_evaluations.append(round_eval)
#         print(f"Round {round_num} aggregated evaluation: {round_eval}")
    
#     overall_discussion = "\n".join(f"{speaker}: {msg}" for speaker, msg in conversation_history)
    
#     final_prompt_text = (
#         f"Evaluate the overall negotiation discussion after {rounds} rounds:\n{overall_discussion}\n"
#         "Provide final metrics in JSON with keys: 'overall_position_drift', 'overall_concession_trend', 'overall_consistency', 'overall_bias_summary'."
#     )
#     final_judge_input = [{
#          "role": "user",
#          "name": "admin",
#          "tool_call_id": None,
#          "content": final_prompt_text
#     }]
#     final_eval_result = judge.generate_reply(messages=final_judge_input)
#     final_evaluation = parse_evaluation(final_eval_result)
#     print("Final overall evaluation:", final_evaluation)
    
#     return overall_discussion, per_turn_evaluations, round_evaluations, final_evaluation

# # Run the debate simulation using vLLM for generation.
# debate_discussion, turn_evals, round_evals, final_eval = run_trips_debate_hybrid(
#     "Temporary suspension of COVID-19 vaccine patents under TRIPS Article 31"
# )
