In [1]:
import numpy as np
from env.reputation_environment import ReputationEnvironment
from env.evaluator import EnvironmentRecorder, NetworkEvaluator

In [2]:
from custom_policies import *

In [3]:
def run_experiment(num_runs, env_hyperparameters, strategy_probs):
    for _ in range(num_runs):
        env = ReputationEnvironment(**env_hyperparameters)
        # env = ReputationEnvironment(n_authors=10, n_conferences=1, render_mode="all", max_rewardless_steps=150, max_agent_steps=5000, max_coauthors=10)
        recorder = EnvironmentRecorder(env)
        observations, infos = env.reset()
        agent_to_strategy = {}
        strats, probs = zip(*strategy_probs.items())
        for agent in env.agents:
            agent_to_strategy[agent] = np.random.choice(list(strats), p=probs)
        recorder.agent_to_strategy = agent_to_strategy

        while len(env.agents) > 0:
            # this is where you would insert your policy
            actions = {}
            for agent in env.agents:
                if agent_to_strategy[agent] == "honest":
                    actions[agent] = simple_policy(agent, env)
                elif agent_to_strategy[agent] == "malicious":
                    actions[agent] = malicious_policy(agent, env)
                elif agent_to_strategy[agent] == "diligent":
                    actions[agent] = diligent_policy(agent, env)
                else:
                    actions[agent] = random_policy(agent, env)
            observations, rewards, terminations, truncations, infos = env.step(actions)
            # breakpoint()
            # env.render()
            # sleep(0.5)
            if env.timestep > 1000:
                break

        if env.render_mode == "network":
            evaluator = NetworkEvaluator(
                {
                    "nodes": list(env.network_nodes.values()), 
                    "links": env.network_links,
                    "steps": env.timestep,
                    "initial_reputation": env.initial_reputation,
                    "agent_strategy": agent_to_strategy,
                    "remaining_agents": env.agents
                }
            )
            evaluator.report()
        else:
            recorder.report()
        # env.render()
        env.close

In [4]:
params1 = {
    "n_authors":500,
    "n_conferences":10,
    "render_mode":"network",
    "max_rewardless_steps":150,
    "max_agent_steps":5000, 
    "max_coauthors":10
}
strats = {
    "diligent": 1,
    "honest": 0,
    "malicious":0
}
run_experiment(5, params1, strats)


Analysis from network file
--------------------
simulation with 500 authors went on for 1001 steps.
442 agents remain.
number of papers submitted: 8315
number of papers accepted: 4547
average number of coauthors per author: 3.08
Agent strategies:
  - before: diligent: 1.0
  - after: diligent: 1.00
mean reputation increase: 47.028
mean effort put into submitted papers: 52.50
median effort put into submitted papers: 52.00
conference submissions, publications and acceptance rates: 
 - conference_0_0 (reputation: 450):   52/80   (0.65)
 - conference_1_0 (reputation: 500):   50/119  (0.42)
 - conference_2_0 (reputation: 400):   51/56   (0.91)
 - conference_3_0 (reputation: 400):   50/63   (0.79)
 - conference_4_0 (reputation: 500):   51/142  (0.36)
 - conference_5_0 (reputation: 400):   51/62   (0.82)
 - conference_6_0 (reputation: 450):   50/83   (0.60)
 - conference_7_0 (reputation: 400):   51/59   (0.86)
 - conference_8_0 (reputation: 500):   50/168  (0.30)
 - conference_9_0 (reputation

In [4]:
params1 = {
    "n_authors":500,
    "n_conferences":10,
    "render_mode":"network",
    "max_rewardless_steps":150,
    "max_agent_steps":5000, 
    "max_coauthors":10
}
strats = {
    "diligent": 1,
    "honest": 0,
    "malicious":0
}
run_experiment(5, params1, strats)


Analysis from network file
--------------------
simulation with 500 authors went on for 1001 steps.
424 agents remain.
number of papers submitted: 8032
number of papers accepted: 4166
average number of coauthors per author: 3.08
Agent strategies:
  - before: diligent: 1.0
  - after: diligent: 1.00
mean reputation increase: 43.568
mean effort put into submitted papers: 52.45
median effort put into submitted papers: 52.00
conference submissions, publications and acceptance rates: 
 - conference_0_0 (reputation: 400):   51/60   (0.85)
 - conference_1_0 (reputation: 450):   51/82   (0.62)
 - conference_2_0 (reputation: 450):   51/74   (0.69)
 - conference_3_0 (reputation: 450):   51/79   (0.65)
 - conference_4_0 (reputation: 500):   51/152  (0.34)
 - conference_5_0 (reputation: 400):   50/60   (0.83)
 - conference_6_0 (reputation: 400):   50/53   (0.94)
 - conference_7_0 (reputation: 500):   50/160  (0.31)
 - conference_8_0 (reputation: 500):   50/155  (0.32)
 - conference_9_0 (reputation

In [38]:
reps1 = np.array([38.798, 46.278, 37.004, 44.386, 43.868])
print(np.mean(reps1))
print(np.std(reps1)/np.mean(reps1))


42.06679999999999
0.08416275920864379


In [39]:
params2 = {
    "n_authors":500,
    "n_conferences":10,
    "render_mode":"network",
    "max_rewardless_steps":150,
    "max_agent_steps":5000, 
    "max_coauthors":10
}
strats2 = {
    "diligent": 0,
    "honest": 1,
    "malicious":0
}
run_experiment(5, params2, strats2)

NameError: name 'np' is not defined

In [21]:
reps2 = [4.31, 5.566, 4.242, 6.732, 3.984]
print(np.mean(reps2))
print(np.std(reps2)/np.mean(reps2))


4.966799999999999
0.20918870113700974


In [16]:
params3 = {
    "n_authors": 500,
    "n_conferences": 10,
    "render_mode": "network",
    "max_rewardless_steps": 150,
    "max_agent_steps": 5000, 
    "max_coauthors": 10
}
strats3 = {
    "diligent": 0,
    "honest": 0,
    "malicious": 1
}
run_experiment(5, params3, strats3)


Analysis from network file
--------------------
simulation with 500 authors went on for 495 steps.
0 agents remain.
number of papers submitted: 3912
number of papers accepted: 189
average number of coauthors per author: 3.89
Agent strategies:
  - before: malicious: 1.0
  - after: 
mean reputation increase: 2.062
mean effort put into submitted papers: 23.05
median effort put into submitted papers: 22.00
conference submissions, publications and acceptance rates: 
 - conference_0_0 (reputation: 450):   18/402  (0.04)
 - conference_1_0 (reputation: 450):   16/394  (0.04)
 - conference_2_0 (reputation: 450):   19/396  (0.05)
 - conference_3_0 (reputation: 500):   20/421  (0.05)
 - conference_4_0 (reputation: 500):   19/396  (0.05)
 - conference_5_0 (reputation: 500):   22/388  (0.06)
 - conference_6_0 (reputation: 400):   15/379  (0.04)
 - conference_7_0 (reputation: 450):   20/374  (0.05)
 - conference_8_0 (reputation: 500):   19/374  (0.05)
 - conference_9_0 (reputation: 500):   21/388  

In [20]:
reps3 = [1.92, 2.534, 2.504, 2.44, 2.662]
print(np.mean(reps3))
print(np.std(reps3)/np.mean(reps3))


2.412
0.10630040126025835


In [23]:
params3 = {
    "n_authors": 500,
    "n_conferences": 10,
    "render_mode": "network",
    "max_rewardless_steps": 150,
    "max_agent_steps": 5000, 
    "max_coauthors": 25
}
strats3 = {
    "diligent": 0,
    "honest": 0,
    "malicious": 1
}
run_experiment(5, params3, strats3)


Analysis from network file
--------------------
simulation with 500 authors went on for 568 steps.
0 agents remain.
number of papers submitted: 4117
number of papers accepted: 209
average number of coauthors per author: 3.85
Agent strategies:
  - before: malicious: 1.0
  - after: 
mean reputation increase: 2.226
mean effort put into submitted papers: 23.04
median effort put into submitted papers: 22.00
conference submissions, publications and acceptance rates: 
 - conference_0_0 (reputation: 450):   26/396  (0.07)
 - conference_1_0 (reputation: 400):   27/419  (0.06)
 - conference_2_0 (reputation: 400):   13/430  (0.03)
 - conference_3_0 (reputation: 450):   12/419  (0.03)
 - conference_4_0 (reputation: 500):   22/410  (0.05)
 - conference_5_0 (reputation: 450):   20/385  (0.05)
 - conference_6_0 (reputation: 500):   17/421  (0.04)
 - conference_7_0 (reputation: 400):   27/426  (0.06)
 - conference_8_0 (reputation: 500):   27/411  (0.07)
 - conference_9_0 (reputation: 450):   18/400  

In [24]:
params5 = {
    "n_authors": 500,
    "n_conferences": 10,
    "render_mode": "network",
    "max_rewardless_steps": 150,
    "max_agent_steps": 5000, 
    "max_coauthors": 25
}
strats5 = {
    "diligent": 0,
    "honest": 1,
    "malicious": 0
}
run_experiment(5, params5, strats5)


Analysis from network file
--------------------
simulation with 500 authors went on for 746 steps.
0 agents remain.
number of papers submitted: 3142
number of papers accepted: 638
average number of coauthors per author: 3.02
Agent strategies:
  - before: honest: 1.0
  - after: 
mean reputation increase: 6.27
mean effort put into submitted papers: 42.48
median effort put into submitted papers: 42.00
conference submissions, publications and acceptance rates: 
 - conference_0_0 (reputation: 400):   50/116  (0.43)
 - conference_1_0 (reputation: 450):   50/276  (0.18)
 - conference_2_0 (reputation: 500):   35/319  (0.11)
 - conference_3_0 (reputation: 400):   50/121  (0.41)
 - conference_4_0 (reputation: 500):   29/316  (0.09)
 - conference_5_0 (reputation: 400):   50/131  (0.38)
 - conference_6_0 (reputation: 400):   50/117  (0.43)
 - conference_7_0 (reputation: 500):   25/303  (0.08)
 - conference_8_0 (reputation: 400):   51/109  (0.47)
 - conference_9_0 (reputation: 450):   50/249  (0.2

In [25]:
params7 = {
    "n_authors": 500,
    "n_conferences": 25,
    "render_mode": "network",
    "max_rewardless_steps": 150,
    "max_agent_steps": 5000, 
    "max_coauthors": 10
}
strats7 = {
    "diligent": 0,
    "honest": 0,
    "malicious": 1
}
run_experiment(5, params7, strats7)


Analysis from network file
--------------------
simulation with 500 authors went on for 526 steps.
0 agents remain.
number of papers submitted: 4034
number of papers accepted: 220
average number of coauthors per author: 3.83
Agent strategies:
  - before: malicious: 1.0
  - after: 
mean reputation increase: 2.402
mean effort put into submitted papers: 22.92
median effort put into submitted papers: 22.00
conference submissions, publications and acceptance rates: 
 - conference_0_0 (reputation: 400):   12/140  (0.09)
 - conference_1_0 (reputation: 400):    9/170  (0.05)
 - conference_2_0 (reputation: 450):    9/153  (0.06)
 - conference_3_0 (reputation: 500):    7/160  (0.04)
 - conference_4_0 (reputation: 500):   12/159  (0.08)
 - conference_5_0 (reputation: 500):    4/140  (0.03)
 - conference_6_0 (reputation: 500):    5/160  (0.03)
 - conference_7_0 (reputation: 500):    9/150  (0.06)
 - conference_8_0 (reputation: 450):    9/155  (0.06)
 - conference_9_0 (reputation: 450):   12/165  

In [None]:
params8 = {
    "n_authors": 500,
    "n_conferences": 25,
    "render_mode": "network",
    "max_rewardless_steps": 150,
    "max_agent_steps": 5000, 
    "max_coauthors": 10
}
strats8 = {
    "diligent": 0,
    "honest": 1,
    "malicious": 0
}
run_experiment(5, params8, strats8)

In [1]:
"""
Example script showing how to use the agent policies with the peer group environment.
"""

import numpy as np
from env.peer_group_environment import PeerGroupEnvironment
from stats_tracker import SimulationStats
from agent_policies import get_policy_function, create_mixed_policy_population
import json
from pathlib import Path


def run_simulation_with_policies(
    n_agents: int = 20,
    max_steps: int = 100,
    policy_distribution: dict = None,
    output_file: str = "policy_simulation_results.jsonl"
):
    """
    Run a simulation with different agent policies.
    
    Args:
        n_agents: Number of agents in the simulation
        max_steps: Maximum number of simulation steps
        policy_distribution: Distribution of policies among agents
        output_file: File to save results
    """
    
    # Create environment
    env = PeerGroupEnvironment(
        n_agents=n_agents,
        peer_group_size=5,
        n_projects=6,
        max_projects_per_agent=3,
        max_timesteps=max_steps,
        max_rewardless_steps=50
    )
    
    # Create agent policy assignments
    agent_policies = create_mixed_policy_population(n_agents, policy_distribution)
    print(f"Agent policy distribution: {dict(zip(*np.unique(agent_policies, return_counts=True)))}")
    
    # Initialize stats tracker
    stats = SimulationStats()
    
    # Reset environment
    observations, infos = env.reset()
    
    # Simulation loop
    for step in range(max_steps):
        actions = {}
        
        # Generate actions for each agent based on their policy
        for agent in env.agents:
            agent_idx = env.agent_to_id[agent]
            policy_name = agent_policies[agent_idx]
            policy_func = get_policy_function(policy_name)
            
            # Get agent's observation and action mask
            obs = observations[agent]["observation"]
            action_mask = observations[agent]["action_mask"]
            
            # Generate action using the agent's policy
            action = policy_func(obs, action_mask)
            actions[agent] = action
        
        # Step the environment
        observations, rewards, terminations, truncations, infos = env.step(actions)
        
        # Update stats
        stats.update(env, observations, rewards, terminations, truncations)
        
        # Print progress
        if step % 10 == 0:
            print(f"Step {step}: {stats.summary_line()}")
        
        # Check if all agents are done
        if all(terminations.values()) or all(truncations.values()):
            print(f"Simulation ended at step {step}")
            break
    
    # Save results
    results = {
        "final_stats": stats.to_dict(),
        "agent_policies": agent_policies,
        "policy_distribution": policy_distribution or {"careerist": 1/3, "orthodox_scientist": 1/3, "mass_producer": 1/3}
    }
    
    with open(output_file, 'w') as f:
        json.dump(results, f, indent=2)
    
    print(f"\nFinal Results:")
    print(f"Total Steps: {stats.total_steps}")
    print(f"Finished Projects: {stats.finished_projects_count}")
    print(f"Successful Projects: {stats.successful_projects_count}")
    print(f"Success Rate: {stats.successful_projects_count / max(stats.finished_projects_count, 1):.3f}")
    print(f"Total Rewards: {stats.total_rewards_distributed:.2f}")
    
    return results


def compare_policy_performances():
    """Compare the performance of different policy distributions."""
    
    # Define different policy distributions to test
    policy_configs = {
        "All Careerist": {"careerist": 1.0, "orthodox_scientist": 0.0, "mass_producer": 0.0},
        "All Orthodox": {"careerist": 0.0, "orthodox_scientist": 1.0, "mass_producer": 0.0},
        "All Mass Producer": {"careerist": 0.0, "orthodox_scientist": 0.0, "mass_producer": 1.0},
        "Balanced": {"careerist": 1/3, "orthodox_scientist": 1/3, "mass_producer": 1/3},
        "Careerist Heavy": {"careerist": 0.6, "orthodox_scientist": 0.2, "mass_producer": 0.2},
        "Orthodox Heavy": {"careerist": 0.2, "orthodox_scientist": 0.6, "mass_producer": 0.2},
    }
    
    results = {}
    
    for config_name, policy_dist in policy_configs.items():
        print(f"\n{'='*50}")
        print(f"Testing: {config_name}")
        print(f"{'='*50}")
        
        result = run_simulation_with_policies(
            n_agents=20,
            max_steps=100,
            policy_distribution=policy_dist,
            output_file=f"results_{config_name.lower().replace(' ', '_')}.json"
        )
        
        results[config_name] = result["final_stats"]
    
    # Print comparison
    print(f"\n{'='*80}")
    print("POLICY COMPARISON SUMMARY")
    print(f"{'='*80}")
    
    for config_name, stats in results.items():
        success_rate = stats["successful_projects"] / max(stats["finished_projects"], 1)
        print(f"{config_name:20} | Success Rate: {success_rate:.3f} | "
              f"Finished: {stats['finished_projects']:3d} | "
              f"Rewards: {stats['total_rewards_distributed']:6.2f}")


if __name__ == "__main__":
    # Run a single simulation with balanced policies
    print("Running single simulation with balanced policies...")
    run_simulation_with_policies()
    
    # Compare different policy distributions
    print("\n" + "="*80)
    print("Comparing different policy distributions...")
    compare_policy_performances()

Running single simulation with balanced policies...
Agent policy distribution: {'careerist': 8, 'mass_producer': 6, 'orthodox_scientist': 6}
Step 0: t=1 | finished=0 | success=0/0 (0.0%) | effort(total=0.00,/step=0.00,/fin_proj=0.00) | rewards(total=0.00,/step=0.00) | open=6 active=10
Step 10: t=11 | finished=0 | success=0/0 (0.0%) | effort(total=45.30,/step=4.12,/fin_proj=0.00) | rewards(total=0.00,/step=0.00) | open=6 active=37
Step 20: t=21 | finished=0 | success=0/0 (0.0%) | effort(total=103.30,/step=4.92,/fin_proj=0.00) | rewards(total=0.00,/step=0.00) | open=6 active=37
Step 30: t=31 | finished=0 | success=0/0 (0.0%) | effort(total=161.30,/step=5.20,/fin_proj=0.00) | rewards(total=0.00,/step=0.00) | open=6 active=37
Step 40: t=41 | finished=0 | success=0/0 (0.0%) | effort(total=219.30,/step=5.35,/fin_proj=0.00) | rewards(total=0.00,/step=0.00) | open=6 active=37
Simulation ended at step 49

Final Results:
Total Steps: 50
Finished Projects: 0
Successful Projects: 0
Success Rate: 0

KeyError: None