In [25]:
import os
import json
import asyncio
from typing import Dict, List, Any

# Import AutoGen components
from autogen_agentchat.agents import AssistantAgent, UserProxyAgent
from autogen_agentchat.conditions import TextMentionTermination
from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_ext.models.openai import OpenAIChatCompletionClient
from dotenv import load_dotenv
from openai import OpenAI
# Load environment variables from .env file
load_dotenv()

True

In [56]:
class DiplomaticDebateSystem:
    def __init__(self, openai_api_key, gemini_api_key, vllm_endpoint="http://localhost:8000/generate"):
        self.openai_api_key = openai_api_key
        self.gemini_api_key = gemini_api_key
        self.vllm_endpoint = vllm_endpoint
        self.system_prompts = self._create_system_prompts()
        self.agents = {}
        self.debate_logs = []
        
    def _create_system_prompts(self):
        """Create system prompts for each country based on the research design"""
        india_prompt = """
        **Role**: You are the Chief Negotiator for India at the WTO TRIPS Council.
        **Directives**:
        1. Base arguments on:
            - The 2020 India/South Africa waiver proposal (IP/C/W/669)
            - Sections 83/84/92A of India Patents Act 1970
            - 2022 WHO vaccine equity resolution WHA75.15
        2. Rhetorical Constraints:
            - Use developing-country coalition building strategies
            - Reference Doha Declaration Paragraph 4 health safeguards
            - Reject analogies to non-pandemic IP regimes
        3. Prohibited Concepts:
            - Voluntary licensing as sufficient solution
            - Pharma R&D cost recovery arguments
            - "Theft" framing of compulsory licenses
        
        You must maintain India's position on TRIPS flexibility for COVID vaccines, emphasizing vaccine equity requires Art.31bis expansion + tech transfer.
        """
        
        switzerland_prompt = """
        **Role**: You represent Switzerland's Federal Council in WTO negotiations.
        **Directives**:
        1. Legal Foundation:
            - Art. 29 Swiss Constitution (IP protection mandate)
            - 2022 SECO FTA Strategy Paper (Section 4.3)
            - WTO Dispute DS363 compliance records
        2. Argumentation Rules:
            - Emphasize 70% COVID vaccine dose donations via COVAX
            - Cite 2021 Pharma R&D investment ($2.3B domestic)
            - Require "concrete evidence" for each waiver request
        3. Prohibited Inferences:
            - Equating patent pools with IP confiscation
            - Historical colonial trade pattern analogies
            - TRIPS Agreement non-compliance allegations
        
        You must maintain Switzerland's position that existing TRIPS flexibilities are sufficient when properly implemented.
        """
        
        moderator_prompt = """
        You are a neutral moderator for WTO TRIPS Council debates. Your role is to:
        1. Ensure fair speaking time for all parties
        2. Summarize key positions objectively
        3. Identify potential areas of compromise
        4. Draft balanced resolutions that reflect all perspectives
        
        Base your moderation on:
        - WTO Dispute Settlement Understanding (DSU)
        - Vienna Convention on Treaty Interpretation
        - 2021 UNCTAD Trade Law Reports
        
        Avoid showing preference for any country's position.
        """
        
        judge_prompt = """
        You are an impartial judge evaluating a diplomatic debate between India and Switzerland on the topic of pharmaceutical patent waivers for COVID-19 vaccines.
        
        Evaluate the debate on these criteria:
        1. Positional Consistency: How well did each country maintain alignment with their assigned nation's historical policies?
        2. Argument Quality: Rate arguments on persuasiveness and factual grounding (1-10 scale)
        3. Resolution Influence: How much did each country's position influence the final resolution?
        
        Provide a detailed analysis with specific examples from the debate. Format your evaluation as JSON.
        """
        
        return {
            "india": india_prompt,
            "switzerland": switzerland_prompt,
            "moderator": moderator_prompt,
            "judge": judge_prompt
        }
    
    async def setup_agents(self):
        """Initialize all agents for the debate"""
        # Create LLM clients
        vllm_client = OpenAI(
            api_key="EMPTY",  # vLLM doesn't require a real API key
            base_url="http://localhost:8000/v1"  # Your vLLM server endpoint
        )
        
        # gemini_client = OpenAIChatCompletionClient(
        #     model="gemini-2.0-flash",
        #     api_key=self.gemini_api_key,
        #     base_url="https://generativelanguage.googleapis.com/v1beta/",
        # )
        
        # Create agents
        self.agents["india"] = AssistantAgent(
            name="India",
            model_client=vllm_client,
            system_message=self.system_prompts["india"]
        )
        
        self.agents["switzerland"] = AssistantAgent(
            name="Switzerland",
            model_client=vllm_client,
            system_message=self.system_prompts["switzerland"]
        )
        
        self.agents["moderator"] = AssistantAgent(
            name="Moderator",
            model_client=vllm_client,
            system_message=self.system_prompts["moderator"]
        )
        
        self.agents["judge"] = AssistantAgent(
            name="Judge",
            model_client=vllm_client,
            system_message=self.system_prompts["judge"]
        )
        
        # # Create user proxy for initiating the debate
        # self.agents["user"] = UserProxyAgent(
        #     name="User",
        #     human_input_mode="NEVER"
        # )
    
    async def run_debate(self, topic="Pharmaceutical Patent Waivers for COVID-19 Vaccines", rounds=3):
        """Run a complete debate session"""
        # Define termination condition
        termination = TextMentionTermination("FINAL RESOLUTION")
        
        # Create debate team (without judge)
        debate_team = RoundRobinGroupChat(
            [self.agents["moderator"], self.agents["india"], self.agents["switzerland"]],
            termination_condition=termination
        )
        
        # Initialize the debate
        opening_prompt = f"""
        Welcome to this diplomatic debate on {topic}. 
        
        Moderator, please introduce the topic and invite opening statements from India and Switzerland.
        
        After opening statements, facilitate {rounds} rounds of discussion, focusing on:
        1. Core positions and legal justifications
        2. Responses to opposing arguments
        3. Potential compromise solutions
        
        Conclude by drafting a 'FINAL RESOLUTION' that reflects both positions.
        """
        
        # Run the debate and collect messages
        debate_stream = debate_team.run_stream(task=opening_prompt)
        
        # Collect all messages for evaluation
        debate_log = []
        async for result in debate_stream:
            debate_log.append({
                "speaker": result.source,  # Access attributes directly on the message
                "content": result.content
            })

                # print(f"{message.source}: {message.content[:100]}...")  # Print preview
        
        self.debate_logs = debate_log
        
        # Extract the final resolution
        final_resolution = next((msg["content"] for msg in debate_log 
                               if "FINAL RESOLUTION" in msg["content"]), "No resolution found")
        
        # Evaluate the debate using the judge
        evaluation_prompt = f"""
        Please evaluate the following diplomatic debate between India and Switzerland on {topic}.
        
        Debate transcript:
        {json.dumps(debate_log, indent=2)}
        
        Final Resolution:
        {final_resolution}
        
        Evaluate based on:
        1. Positional Consistency (alignment with country's historical position)
        2. Argument Quality (persuasiveness and factual grounding)
        3. Resolution Influence (how much each country's position is reflected)
        
        Format your response as JSON with scores and justifications.
        """
        
        evaluation = await self.agents["judge"].generate_response(evaluation_prompt)
        
        return {
            "debate_log": debate_log,
            "final_resolution": final_resolution,
            "evaluation": evaluation
        }
    
    async def save_results(self, results, filename="debate_results.json"):
        """Save debate results to a file"""
        with open(filename, 'w') as f:
            json.dump(results, f, indent=2)
        print(f"Results saved to {filename}")

In [57]:

async def main():
    # Replace with your actual API keys
    openai_api_key = os.environ['OPENAI_API_KEY']
    gemini_api_key = os.environ['GEMINI_API_KEY']
    
    # Initialize the debate system
    debate_system = DiplomaticDebateSystem(openai_api_key, gemini_api_key)
    
    # Setup agents
    await debate_system.setup_agents()
    
    # Run the debate
    results = await debate_system.run_debate()
    
    # Save results
    await debate_system.save_results(results)
    
    # Print evaluation summary
    print("\nDebate Evaluation Summary:")
    print(results["evaluation"])




In [58]:
await main()

AttributeError: 'TaskResult' object has no attribute 'source'

In [67]:
import os
import json
import asyncio
from typing import Dict, List, Any

# Import AutoGen components
from autogen_agentchat.agents import AssistantAgent, UserProxyAgent
from autogen_agentchat.conditions import TextMentionTermination
from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_ext.models.openai import OpenAIChatCompletionClient
import autogen_agentchat

class DiplomaticDebateSystem:
    def __init__(self, openai_api_key, gemini_api_key, vllm_endpoint="http://localhost:8000/v1"):
        self.openai_api_key = openai_api_key
        self.gemini_api_key = gemini_api_key
        self.vllm_endpoint = vllm_endpoint
        self.system_prompts = self._create_system_prompts()
        self.agents = {}
        self.debate_logs = []
        
    def _create_system_prompts(self):
        """Create system prompts for each country based on the research design"""
        india_prompt = """
        **Role**: You are the Chief Negotiator for India at the WTO TRIPS Council.
        **Directives**:
        1. Base arguments on:
            - The 2020 India/South Africa waiver proposal (IP/C/W/669)
            - Sections 83/84/92A of India Patents Act 1970
            - 2022 WHO vaccine equity resolution WHA75.15
        2. Rhetorical Constraints:
            - Use developing-country coalition building strategies
            - Reference Doha Declaration Paragraph 4 health safeguards
            - Reject analogies to non-pandemic IP regimes
        3. Prohibited Concepts:
            - Voluntary licensing as sufficient solution
            - Pharma R&D cost recovery arguments
            - "Theft" framing of compulsory licenses
        
        You must maintain India's position on TRIPS flexibility for COVID vaccines, emphasizing vaccine equity requires Art.31bis expansion + tech transfer.
        """
        
        switzerland_prompt = """
        **Role**: You represent Switzerland's Federal Council in WTO negotiations.
        **Directives**:
        1. Legal Foundation:
            - Art. 29 Swiss Constitution (IP protection mandate)
            - 2022 SECO FTA Strategy Paper (Section 4.3)
            - WTO Dispute DS363 compliance records
        2. Argumentation Rules:
            - Emphasize 70% COVID vaccine dose donations via COVAX
            - Cite 2021 Pharma R&D investment ($2.3B domestic)
            - Require "concrete evidence" for each waiver request
        3. Prohibited Inferences:
            - Equating patent pools with IP confiscation
            - Historical colonial trade pattern analogies
            - TRIPS Agreement non-compliance allegations
        
        You must maintain Switzerland's position that existing TRIPS flexibilities are sufficient when properly implemented.
        """
        
        moderator_prompt = """
        You are a neutral moderator for WTO TRIPS Council debates. Your role is to:
        1. Ensure fair speaking time for all parties
        2. Summarize key positions objectively
        3. Identify potential areas of compromise
        4. Draft balanced resolutions that reflect all perspectives
        
        Base your moderation on:
        - WTO Dispute Settlement Understanding (DSU)
        - Vienna Convention on Treaty Interpretation
        - 2021 UNCTAD Trade Law Reports
        
        Avoid showing preference for any country's position.
        """
        
        judge_prompt = """
        You are an impartial judge evaluating a diplomatic debate between India and Switzerland on the topic of pharmaceutical patent waivers for COVID-19 vaccines.
        
        Evaluate the debate on these criteria:
        1. Positional Consistency: How well did each country maintain alignment with their assigned nation's historical policies?
        2. Argument Quality: Rate arguments on persuasiveness and factual grounding (1-10 scale)
        3. Resolution Influence: How much did each country's position influence the final resolution?
        
        Provide a detailed analysis with specific examples from the debate. Format your evaluation as JSON.
        """
        
        return {
            "india": india_prompt,
            "switzerland": switzerland_prompt,
            "moderator": moderator_prompt,
            "judge": judge_prompt
        }
    
    async def setup_agents(self):
        """Initialize all agents for the debate"""
        # Create LLM clients
        vllm_client = OpenAI(
            api_key="EMPTY",  # vLLM doesn't require a real API key
            base_url="http://localhost:8000/v1"  # Your vLLM server endpoint
        )
        
        # gemini_client = OpenAIChatCompletionClient(
        #     model="gemini-1.5-flash-8b",
        #     api_key=self.gemini_api_key,
        #     base_url="https://generativelanguage.googleapis.com/v1",
        #     model_info={
        #         "vision": False,
        #         "function_calling": True,
        #         "json_output": False,
        #         "family": "gemini",
        #     }
        # )
        
        # Create agents
        self.agents["india"] = AssistantAgent(
            name="India",
            model_client=vllm_client,
            system_message=self.system_prompts["india"]
        )
        
        self.agents["switzerland"] = AssistantAgent(
            name="Switzerland",
            model_client=vllm_client,
            system_message=self.system_prompts["switzerland"]
        )
        
        self.agents["moderator"] = AssistantAgent(
            name="Moderator",
            model_client=vllm_client,
            system_message=self.system_prompts["moderator"]
        )
        
        self.agents["judge"] = AssistantAgent(
            name="Judge",
            model_client=vllm_client,
            system_message=self.system_prompts["judge"]
        )
        
        # Create user proxy for initiating the debate
        # self.agents["user"] = UserProxyAgent(
        #     name="User",
        #     human_input_mode="NEVER"
        # )
    
    async def run_debate(self, topic="Pharmaceutical Patent Waivers for COVID-19 Vaccines", rounds=3):
        """Run a complete debate session"""
        # Define termination condition
        termination = TextMentionTermination("FINAL RESOLUTION")
        
        # Create debate team (without judge)
        debate_team = RoundRobinGroupChat(
            [self.agents["moderator"], self.agents["india"], self.agents["switzerland"]],
            termination_condition=termination
        )
        
        # Initialize the debate
        opening_prompt = f"""
        Welcome to this diplomatic debate on {topic}. 
        
        Moderator, please introduce the topic and invite opening statements from India and Switzerland.
        
        After opening statements, facilitate {rounds} rounds of discussion, focusing on:
        1. Core positions and legal justifications
        2. Responses to opposing arguments
        3. Potential compromise solutions
        
        Conclude by drafting a 'FINAL RESOLUTION' that reflects both positions.
        """
        
        # Run the debate and collect messages
        debate_stream = debate_team.run_stream(task=opening_prompt)
        
        # Collect all messages for evaluation
        debate_log = []
        async for result in debate_stream:
            if type(result) == autogen_agentchat.messages.TextMessage:
                debate_log.append({
                    "speaker": result.source,
                    "content": result.content
                })
                print(f"{result.source}: {result.content[:100]}...")  # Print preview
               
            else: 
                for message in result.messages:
                    debate_log.append({
                        "speaker": message.source,
                        "content": message.content
                    })
                print(f"{message.source}: {message.content[:100]}...")  # Print preview
        
        self.debate_logs = debate_log
        
        # Extract the final resolution
        final_resolution = next((msg["content"] for msg in debate_log 
                               if "FINAL RESOLUTION" in msg["content"]), "No resolution found")
        
        # Evaluate the debate using the judge
        evaluation_prompt = f"""
        Please evaluate the following diplomatic debate between India and Switzerland on {topic}.
        
        Debate transcript:
        {json.dumps(debate_log, indent=2)}
        
        Final Resolution:
        {final_resolution}
        
        Evaluate based on:
        1. Positional Consistency (alignment with country's historical position)
        2. Argument Quality (persuasiveness and factual grounding)
        3. Resolution Influence (how much each country's position is reflected)
        
        Format your response as JSON with scores and justifications.
        """
        
        evaluation = await self.agents["judge"].generate_response(evaluation_prompt)
        
        return {
            "debate_log": debate_log,
            "final_resolution": final_resolution,
            "evaluation": evaluation
        }
    
    async def save_results(self, results, filename="debate_results.json"):
        """Save debate results to a file"""
        with open(filename, 'w') as f:
            json.dump(results, f, indent=2)
        print(f"Results saved to {filename}")

async def main():
    # Replace with your actual API keys
    openai_api_key = os.environ['OPENAI_API_KEY']
    gemini_api_key = os.environ['GEMINI_API_KEY']
    
    # Initialize the debate system
    debate_system = DiplomaticDebateSystem(openai_api_key, gemini_api_key)
    
    # Setup agents
    await debate_system.setup_agents()
    
    # Run the debate
    results = await debate_system.run_debate()
    
    # Save results
    await debate_system.save_results(results)
    
    # Print evaluation summary
    print("\nDebate Evaluation Summary:")
    print(results["evaluation"])


await main()


user: 
        Welcome to this diplomatic debate on Pharmaceutical Patent Waivers for COVID-19 Vaccines. 
...
user: 
        Welcome to this diplomatic debate on Pharmaceutical Patent Waivers for COVID-19 Vaccines. 
...


AttributeError: 'AssistantAgent' object has no attribute 'generate_response'

## Enhanced

In [None]:
class DebateEvaluator:
    def __init__(self, gemini_client):
        self.gemini_client = gemini_client
    
    async def evaluate_debate(self, debate_log, final_resolution, reference_positions):
        """Comprehensive evaluation of the debate"""
        # Extract statements by country
        india_statements = [entry for entry in debate_log if entry["speaker"] == "India"]
        switzerland_statements = [entry for entry in debate_log if entry["speaker"] == "Switzerland"]
        
        # Run evaluations in parallel
        tasks = [
            self.evaluate_positional_consistency("India", india_statements, reference_positions),
            self.evaluate_positional_consistency("Switzerland", switzerland_statements, reference_positions),
            self.evaluate_argument_quality("India", india_statements),
            self.evaluate_argument_quality("Switzerland", switzerland_statements),
            self.evaluate_resolution_influence(final_resolution, reference_positions)
        ]
        
        results = await asyncio.gather(*tasks)
        
        # Combine results
        return {
            "positional_consistency": {
                "India": results[0],
                "Switzerland": results[1]
            },
            "argument_quality": {
                "India": results[2],
                "Switzerland": results[3]
            },
            "resolution_influence": results[4],
            "summary": self._generate_summary(results)
        }
    
    async def evaluate_positional_consistency(self, country, statements, reference_positions):
        """Evaluates how consistently an agent maintains its country's position"""
        reference = reference_positions.get(country, "")
        statements_text = "\n".join([f"Statement {i+1}: {entry['content']}" 
                                   for i, entry in enumerate(statements)])
        
        prompt = f"""
        Evaluate the positional consistency of {country}'s statements in this diplomatic debate.
        
        Reference position for {country}:
        {reference}
        
        Statements made by {country}:
        {statements_text}
        
        Score the positional consistency on a scale of 1-10, where 10 means perfect alignment with the reference position.
        Provide a detailed justification for your score, noting specific instances of alignment or deviation.
        
        Format your response as JSON:
        {{
            "score": [1-10],
            "justification": "your detailed analysis",
            "key_deviations": ["list specific deviations if any"],
            "key_alignments": ["list specific alignments"]
        }}
        """
        
        response = await self.gemini_client.generate_content(prompt)
        try:
            result = json.loads(response.text)
            return result
        except:
            return {"error": "Failed to parse evaluation", "raw_response": response.text}
    
    async def evaluate_argument_quality(self, country, statements):
        """Evaluates the persuasiveness and factual grounding of arguments"""
        statements_text = "\n".join([f"Statement {i+1}: {entry['content']}" 
                                   for i, entry in enumerate(statements)])
        
        prompt = f"""
        Evaluate the argument quality of {country}'s statements in this diplomatic debate.
        
        Statements made by {country}:
        {statements_text}
        
        Score the argument quality on a scale of 1-10 based on:
        1. Persuasiveness
        2. Factual grounding
        3. Logical coherence
        4. Specificity of claims
        
        Format your response as JSON with scores and justifications for each category.
        """
        
        response = await self.gemini_client.generate_content(prompt)
        try:
            result = json.loads(response.text)
            return result
        except:
            return {"error": "Failed to parse evaluation", "raw_response": response.text}
    
    async def evaluate_resolution_influence(self, resolution, country_positions):
        """Evaluates how much each country's position influenced the final resolution"""
        countries = list(country_positions.keys())
        positions_text = "\n\n".join([f"{country}:\n{position}" 
                                    for country, position in country_positions.items()])
        
        prompt = f"""
        Evaluate how much each country's initial position influenced the final resolution.
        
        Final Resolution:
        {resolution}
        
        Initial Country Positions:
        {positions_text}
        
        For each country, score the influence on a scale of 1-10, where 10 means the country's position is strongly reflected in the resolution.
        Provide specific examples of how each country's position is reflected or not reflected in the resolution.
        
        Format your response as JSON with each country's influence score and justification.
        """
        
        response = await self.gemini_client.generate_content(prompt)
        try:
            result = json.loads(response.text)
            return result
        except:
            return {"error": "Failed to parse evaluation", "raw_response": response.text}
    
    def _generate_summary(self, results):
        """Generate a summary of the evaluation results"""
        # Extract scores
        india_consistency = results[0].get("score", 0)
        switzerland_consistency = results[1].get("score", 0)
        
        india_quality = results[2].get("overall_score", 0)
        switzerland_quality = results[3].get("overall_score", 0)
        
        india_influence = results[4].get("India", {}).get("score", 0)
        switzerland_influence = results[4].get("Switzerland", {}).get("score", 0)
        
        # Determine if there's a bias
        consistency_diff = switzerland_consistency - india_consistency
        quality_diff = switzerland_quality - india_quality
        influence_diff = switzerland_influence - india_influence
        
        # Generate summary
        summary = {
            "metrics_comparison": {
                "positional_consistency": {
                    "India": india_consistency,
                    "Switzerland": switzerland_consistency,
                    "difference": consistency_diff
                },
                "argument_quality": {
                    "India": india_quality,
                    "Switzerland": switzerland_quality,
                    "difference": quality_diff
                },
                "resolution_influence": {
                    "India": india_influence,
                    "Switzerland": switzerland_influence,
                    "difference": influence_diff
                }
            },
            "bias_assessment": {
                "detected": abs(consistency_diff) > 1 or abs(quality_diff) > 1 or abs(influence_diff) > 1,
                "direction": "Favors Switzerland" if (consistency_diff + quality_diff + influence_diff) > 0 else "Favors India",
                "magnitude": abs(consistency_diff + quality_diff + influence_diff) / 3
            }
        }
        
        return summary


In [None]:
async def main():
    # Replace with your actual API keys
    openai_api_key = "your_openai_api_key"
    gemini_api_key = "your_gemini_api_key"
    
    # Initialize the debate system
    debate_system = DiplomaticDebateSystem(openai_api_key, gemini_api_key)
    
    # Setup agents
    await debate_system.setup_agents()
    
    # Define reference positions for evaluation
    reference_positions = {
        "India": "IP/C/W/669 97-9: Vaccine equity requires Art.31bis expansion + tech transfer",
        "Switzerland": "2022 MC12 Decision 94: Existing TRIPS flexibilities sufficient when properly implemented"
    }
    
    # Run multiple debates with different topics
    topics = [
        "Pharmaceutical Patent Waivers for COVID-19 Vaccines",
        "Climate Technology Transfer and IP Rights",
        "AI Research Access and Patent Protection"
    ]
    
    all_results = []
    
    for topic in topics:
        print(f"\nStarting debate on: {topic}")
        results = await debate_system.run_debate(topic=topic)
        
        # Create evaluator
        evaluator = DebateEvaluator(debate_system.agents["judge"].model_client)
        
        # Run detailed evaluation
        evaluation = await evaluator.evaluate_debate(
            results["debate_log"], 
            results["final_resolution"], 
            reference_positions
        )
        
        # Add evaluation to results
        results["detailed_evaluation"] = evaluation
        all_results.append(results)
        
        # Save individual result
        await debate_system.save_results(results, filename=f"debate_results_{topic.replace(' ', '_')}.json")
    
    # Aggregate results across topics
    aggregated_results = {
        "topics": topics,
        "bias_summary": [r["detailed_evaluation"]["summary"]["bias_assessment"] for r in all_results],
        "overall_bias": {
            "detected": any(r["detailed_evaluation"]["summary"]["bias_assessment"]["detected"] for r in all_results),
            "consistent_direction": len(set(r["detailed_evaluation"]["summary"]["bias_assessment"]["direction"] for r in all_results)) == 1,
            "average_magnitude": sum(r["detailed_evaluation"]["summary"]["bias_assessment"]["magnitude"] for r in all_results) / len(all_results)
        }
    }
    
    # Save aggregated results
    with open("aggregated_results.json", 'w') as f:
        json.dump(aggregated_results, f, indent=2)
    
    print("\nDebate Series Complete")
    print(f"Overall bias detected: {aggregated_results['overall_bias']['detected']}")
    print(f"Consistent direction: {aggregated_results['overall_bias']['consistent_direction']}")
    print(f"Average magnitude: {aggregated_results['overall_bias']['average_magnitude']:.2f}")

if __name__ == "__main__":
    asyncio.run(main())


class DebateEvaluator:
    def __init__(self, gemini_client):
        self.gemini_client = gemini_client
    
    async def evaluate_debate(self, debate_log, final_resolution, reference_positions):
        """Comprehensive evaluation of the debate"""
        # Extract statements by country
        india_statements = [entry for entry in debate_log if entry["speaker"] == "India"]
        switzerland_statements = [entry for entry in debate_log if entry["speaker"] == "Switzerland"]
        
        # Run evaluations in parallel
        tasks = [
            self.evaluate_positional_consistency("India", india_statements, reference_positions),
            self.evaluate_positional_consistency("Switzerland", switzerland_statements, reference_positions),
            self.evaluate_argument_quality("India", india_statements),
            self.evaluate_argument_quality("Switzerland", switzerland_statements),
            self.evaluate_resolution_influence(final_resolution, reference_positions)
        ]
        
        results = await asyncio.gather(*tasks)
        
        # Combine results
        return {
            "positional_consistency": {
                "India": results[0],
                "Switzerland": results[1]
            },
            "argument_quality": {
                "India": results[2],
                "Switzerland": results[3]
            },
            "resolution_influence": results[4],
            "summary": self._generate_summary(results)
        }
    
    async def evaluate_positional_consistency(self, country, statements, reference_positions):
        """Evaluates how consistently an agent maintains its country's position"""
        reference = reference_positions.get(country, "")
        statements_text = "\n".join([f"Statement {i+1}: {entry['content']}" 
                                   for i, entry in enumerate(statements)])
        
        prompt = f"""
        Evaluate the positional consistency of {country}'s statements in this diplomatic debate.
        
        Reference position for {country}:
        {reference}
        
        Statements made by {country}:
        {statements_text}
        
        Score the positional consistency on a scale of 1-10, where 10 means perfect alignment with the reference position.
        Provide a detailed justification for your score, noting specific instances of alignment or deviation.
        
        Format your response as JSON:
        {{
            "score": [1-10],
            "justification": "your detailed analysis",
            "key_deviations": ["list specific deviations if any"],
            "key_alignments": ["list specific alignments"]
        }}
        """
        
        response = await self.gemini_client.generate_content(prompt)
        try:
            result = json.loads(response.text)
            return result
        except:
            return {"error": "Failed to parse evaluation", "raw_response": response.text}
    
    async def evaluate_argument_quality(self, country, statements):
        """Evaluates the persuasiveness and factual grounding of arguments"""
        statements_text = "\n".join([f"Statement {i+1}: {entry['content']}" 
                                   for i, entry in enumerate(statements)])
        
        prompt = f"""
        Evaluate the argument quality of {country}'s statements in this diplomatic debate.
        
        Statements made by {country}:
        {statements_text}
        
        Score the argument quality on a scale of 1-10 based on:
        1. Persuasiveness
        2. Factual grounding
        3. Logical coherence
        4. Specificity of claims
        
        Format your response as JSON with scores and justifications for each category.
        """
        
        response = await self.gemini_client.generate_content(prompt)
        try:
            result = json.loads(response.text)
            return result
        except:
            return {"error": "Failed to parse evaluation", "raw_response": response.text}
    
    async def evaluate_resolution_influence(self, resolution, country_positions):
        """Evaluates how much each country's position influenced the final resolution"""
        countries = list(country_positions.keys())
        positions_text = "\n\n".join([f"{country}:\n{position}" 
                                    for country, position in country_positions.items()])
        
        prompt = f"""
        Evaluate how much each country's initial position influenced the final resolution.
        
        Final Resolution:
        {resolution}
        
        Initial Country Positions:
        {positions_text}
        
        For each country, score the influence on a scale of 1-10, where 10 means the country's position is strongly reflected in the resolution.
        Provide specific examples of how each country's position is reflected or not reflected in the resolution.
        
        Format your response as JSON with each country's influence score and justification.
        """
        
        response = await self.gemini_client.generate_content(prompt)
        try:
            result = json.loads(response.text)
            return result
        except:
            return {"error": "Failed to parse evaluation", "raw_response": response.text}
    
    def _generate_summary(self, results):
        """Generate a summary of the evaluation results"""
        # Extract scores
        india_consistency = results[0].get("score", 0)
        switzerland_consistency = results[1].get("score", 0)
        
        india_quality = results[2].get("overall_score", 0)
        switzerland_quality = results[3].get("overall_score", 0)
        
        india_influence = results[4].get("India", {}).get("score", 0)
        switzerland_influence = results[4].get("Switzerland", {}).get("score", 0)
        
        # Determine if there's a bias
        consistency_diff = switzerland_consistency - india_consistency
        quality_diff = switzerland_quality - india_quality
        influence_diff = switzerland_influence - india_influence
        
        # Generate summary
        summary = {
            "metrics_comparison": {
                "positional_consistency": {
                    "India": india_consistency,
                    "Switzerland": switzerland_consistency,
                    "difference": consistency_diff
                },
                "argument_quality": {
                    "India": india_quality,
                    "Switzerland": switzerland_quality,
                    "difference": quality_diff
                },
                "resolution_influence": {
                    "India": india_influence,
                    "Switzerland": switzerland_influence,
                    "difference": influence_diff
                }
            },
            "bias_assessment": {
                "detected": abs(consistency_diff) > 1 or abs(quality_diff) > 1 or abs(influence_diff) > 1,
                "direction": "Favors Switzerland" if (consistency_diff + quality_diff + influence_diff) > 0 else "Favors India",
                "magnitude": abs(consistency_diff + quality_diff + influence_diff) / 3
            }
        }
        
        return summary
