In [None]:
from overcooked_ai_py.agents.benchmarking import AgentEvaluator, LayoutGenerator
from overcooked_ai_py.agents.agent import Agent, AgentPair, StayAgent, GreedyAgent
from overcooked_ai_py.mdp.overcooked_mdp import OvercookedGridworld
from overcooked_ai_py.planning.planners import MediumLevelActionManager, COUNTERS_MLG_PARAMS, MotionPlanner
from game import OvercookedGame, PlanningGame
import time
import json
import glob
import os

In [None]:
# Configuration et analyse du layout
layout_name = "generation_cesar/layout_cesar_1"
layouts_dir = "./overcooked_ai_py/data/layouts/generation_cesar/"

print(f"🏗️ Évaluation du layout: {layout_name}")

# Charger le MDP
mdp = OvercookedGridworld.from_layout_name(layout_name)
print(f"✅ MDP chargé: {mdp.width}x{mdp.height}")

# Analyser les éléments du layout
elements = {
    'tomato_dispensers': sum(row.count('T') for row in mdp.terrain_mtx),
    'onion_dispensers': sum(row.count('O') for row in mdp.terrain_mtx),
    'dish_dispensers': sum(row.count('D') for row in mdp.terrain_mtx),
    'pots': sum(row.count('P') for row in mdp.terrain_mtx),
    'serve_areas': sum(row.count('S') for row in mdp.terrain_mtx),
    'players': len(mdp.start_player_positions)
}

print(f"📊 Analyse du layout:")
for element, count in elements.items():
    print(f"   {element}: {count}")

# Configuration des paramètres MLAM
counter_params = COUNTERS_MLG_PARAMS
if mdp.counter_goals:
    counter_params["counter_goals"] = mdp.counter_goals
    counter_params["counter_drop"] = mdp.counter_goals
    counter_params["counter_pickup"] = mdp.counter_goals

print(f"⚙️ Paramètres MLAM configurés")

In [None]:
# Méthode 1: Évaluation avec AgentEvaluator (approche recommandée)
print("🎮 MÉTHODE 1: Évaluation avec AgentEvaluator")
print("="*50)

start_time = time.time()

# Créer l'évaluateur avec un horizon plus long pour permettre la complétion des recettes
env_params = {"horizon": 400}  # Horizon plus long
agent_eval = AgentEvaluator.from_mdp(mdp, env_params, mlam_params=counter_params)

# Créer les agents GreedyAgent
print("🤖 Création et configuration des GreedyAgent...")
greedyagent1 = GreedyAgent()
greedyagent2 = GreedyAgent()

# Configuration avec gestion du temps
print("⚙️ Configuration MLAM (peut prendre du temps)...")
config_start = time.time()

greedyagent1.set_mdp(mdp)
greedyagent2.set_mdp(mdp)

config_time = time.time() - config_start
print(f"✅ Agents configurés en {config_time:.2f}s")

# Créer la paire d'agents
agent_pair = AgentPair(greedyagent1, greedyagent2)

# Évaluation avec multiple parties
num_games = 3
print(f"🚀 Lancement de {num_games} parties d'évaluation...")

eval_results = agent_eval.evaluate_agent_pair(
    agent_pair, 
    num_games=num_games, 
    native_eval=True,
    info=True
)

total_time = time.time() - start_time

print(f"\n📈 RÉSULTATS DE L'ÉVALUATION:")
print(f"   ⏱️ Temps total: {total_time:.2f}s")
print(f"   🎮 Parties jouées: {num_games}")

# Analyser les résultats
if 'ep_rewards' in eval_results:
    rewards = eval_results['ep_rewards']
    print(f"   📊 Scores: {rewards}")
    print(f"   📈 Score moyen: {sum(rewards)/len(rewards):.2f}")
    print(f"   🏆 Meilleur score: {max(rewards)}")
    
if 'ep_lengths' in eval_results:
    lengths = eval_results['ep_lengths']
    print(f"   🔢 Durées (steps): {lengths}")
    print(f"   ⚡ Durée moyenne: {sum(lengths)/len(lengths):.1f}")

# Afficher d'autres métriques si disponibles
for key, value in eval_results.items():
    if key not in ['ep_rewards', 'ep_lengths'] and isinstance(value, (list, tuple)) and len(value) > 0:
        if isinstance(value[0], (int, float)):
            print(f"   📋 {key}: {value}")

eval_results

KeyboardInterrupt: 

In [None]:
# Méthode 2: Évaluation avec l'infrastructure OvercookedGame (comme dans app.py)
print("\n🎮 MÉTHODE 2: Évaluation avec OvercookedGame")
print("="*50)

# Configurer le module game (nécessaire pour OvercookedGame)
import game
game._configure(
    max_game_time=1000,
    agent_dir="./static/assets/agents"  # Répertoire des agents
)

try:
    # Configuration d'une partie avec deux GreedyAgent
    game_config = {
        'layouts': [layout_name.split('/')[-1]],  # Juste le nom du layout
        'layouts_dir': layouts_dir,
        'num_players': 2,
        'gameTime': 60,  # 60 secondes
        'playerZero': 'GreedyAgent',
        'playerOne': 'GreedyAgent',
        'mdp_params': {},
        'curr_trial_in_game': 0,
        'showPotential': False,
        'randomized': False,
        'planning_agent_id': 'GreedyAgent',
        'config': {'completion_link': None}
    }
    
    print("📦 Création du jeu avec PlanningGame...")
    game_instance = PlanningGame(**game_config)
    
    print("✅ Jeu créé avec succès!")
    print(f"   🏗️ Layout: {game_instance.curr_layout}")
    print(f"   ⚙️ MDP: {game_instance.mdp.width}x{game_instance.mdp.height}")
    print(f"   🤖 Agents: {len(game_instance.npc_policies)} IA")
    
    # Informations sur les agents
    for agent_id, agent in game_instance.npc_policies.items():
        print(f"     - {agent_id}: {type(agent).__name__}")
    
except Exception as e:
    print(f"❌ Erreur avec PlanningGame: {e}")
    print("💡 Cette méthode nécessite une configuration plus complexe pour les nouveaux layouts")

In [None]:
# Méthode 3: Évaluation de multiples layouts
print("\n🎮 MÉTHODE 3: Évaluation multiple layouts")
print("="*50)

def evaluate_multiple_layouts(layouts_directory, max_layouts=3, games_per_layout=2):
    """Évalue plusieurs layouts avec des GreedyAgent"""
    
    # Trouver tous les layouts
    layout_files = glob.glob(os.path.join(layouts_directory, "*.layout"))
    layout_names = [os.path.basename(f).replace('.layout', '') for f in layout_files]
    
    if not layout_names:
        print(f"❌ Aucun layout trouvé dans {layouts_directory}")
        return {}
    
    layout_names = layout_names[:max_layouts]
    print(f"✅ {len(layout_names)} layouts trouvés: {layout_names}")
    
    all_results = {}
    
    for i, layout_name in enumerate(layout_names, 1):
        print(f"\n🏗️ Layout {i}/{len(layout_names)}: {layout_name}")
        print("-" * 40)
        
        try:
            # Charger le MDP
            full_layout_path = f"generation_cesar/{layout_name}"
            layout_mdp = OvercookedGridworld.from_layout_name(full_layout_path)
            
            # Analyser le layout
            layout_elements = {
                'tomato_dispensers': sum(row.count('T') for row in layout_mdp.terrain_mtx),
                'onion_dispensers': sum(row.count('O') for row in layout_mdp.terrain_mtx),
                'dish_dispensers': sum(row.count('D') for row in layout_mdp.terrain_mtx),
                'pots': sum(row.count('P') for row in layout_mdp.terrain_mtx),
                'serve_areas': sum(row.count('S') for row in layout_mdp.terrain_mtx)
            }
            
            # Vérifier si le layout est viable
            viable = (layout_elements['tomato_dispensers'] > 0 and 
                     layout_elements['onion_dispensers'] > 0 and
                     layout_elements['dish_dispensers'] > 0 and
                     layout_elements['pots'] > 0 and
                     layout_elements['serve_areas'] > 0)
            
            print(f"   📊 Éléments: T={layout_elements['tomato_dispensers']}, "
                  f"O={layout_elements['onion_dispensers']}, "
                  f"D={layout_elements['dish_dispensers']}, "
                  f"P={layout_elements['pots']}, "
                  f"S={layout_elements['serve_areas']}")
            print(f"   {'✅' if viable else '❌'} Layout {'viable' if viable else 'non viable'}")
            
            if not viable:
                all_results[layout_name] = {
                    'viable': False,
                    'elements': layout_elements,
                    'error': 'Layout manque d\'éléments essentiels'
                }
                continue
            
            # Évaluation avec AgentEvaluator
            start_eval = time.time()
            
            # Paramètres pour ce layout
            layout_counter_params = COUNTERS_MLG_PARAMS.copy()
            if layout_mdp.counter_goals:
                layout_counter_params["counter_goals"] = layout_mdp.counter_goals
                layout_counter_params["counter_drop"] = layout_mdp.counter_goals
                layout_counter_params["counter_pickup"] = layout_mdp.counter_goals
            
            # Créer l'évaluateur
            layout_env_params = {"horizon": 300}
            layout_eval = AgentEvaluator.from_mdp(layout_mdp, layout_env_params, 
                                                mlam_params=layout_counter_params)
            
            # Créer les agents
            agent1 = GreedyAgent()
            agent2 = GreedyAgent()
            agent1.set_mdp(layout_mdp)
            agent2.set_mdp(layout_mdp)
            pair = AgentPair(agent1, agent2)
            
            # Évaluer
            print(f"   🚀 Évaluation {games_per_layout} parties...")
            results = layout_eval.evaluate_agent_pair(pair, num_games=games_per_layout, native_eval=True)
            
            eval_time = time.time() - start_eval
            
            # Traiter les résultats
            layout_results = {
                'viable': True,
                'elements': layout_elements,
                'eval_time': eval_time,
                'games_played': games_per_layout
            }
            
            if 'ep_rewards' in results:
                rewards = results['ep_rewards']
                layout_results.update({
                    'scores': rewards,
                    'avg_score': sum(rewards) / len(rewards),
                    'max_score': max(rewards),
                    'min_score': min(rewards)
                })
                print(f"   📈 Scores: {rewards} (moy: {layout_results['avg_score']:.1f})")
            
            if 'ep_lengths' in results:
                lengths = results['ep_lengths']
                layout_results.update({
                    'lengths': lengths,
                    'avg_length': sum(lengths) / len(lengths)
                })
                print(f"   ⏱️ Durées: {lengths} (moy: {layout_results['avg_length']:.1f})")
            
            print(f"   ✅ Évaluation terminée en {eval_time:.2f}s")
            
            all_results[layout_name] = layout_results
            
        except Exception as e:
            print(f"   ❌ Erreur: {e}")
            all_results[layout_name] = {
                'viable': False,
                'error': str(e)
            }
    
    return all_results

# Lancer l'évaluation multiple
multi_results = evaluate_multiple_layouts(layouts_dir, max_layouts=3, games_per_layout=2)

# Résumé des résultats
print(f"\n🏆 RÉSUMÉ ÉVALUATION MULTIPLE")
print("="*50)

viable_layouts = [name for name, data in multi_results.items() if data.get('viable', False)]
print(f"✅ Layouts viables: {len(viable_layouts)}/{len(multi_results)}")

if viable_layouts:
    # Classement par score moyen
    ranked = [(name, data['avg_score']) for name, data in multi_results.items() 
              if data.get('viable') and 'avg_score' in data]
    ranked.sort(key=lambda x: x[1], reverse=True)
    
    print(f"\n🥇 CLASSEMENT PAR PERFORMANCE:")
    for i, (name, score) in enumerate(ranked, 1):
        medal = "🥇" if i == 1 else "🥈" if i == 2 else "🥉" if i == 3 else f"{i}."
        print(f"   {medal} {name}: {score:.1f} points")

multi_results

In [None]:
# Méthode 4: Analyse comportementale détaillée des GreedyAgent
print("\n🧠 MÉTHODE 4: Analyse comportementale détaillée")
print("="*50)

def detailed_behavioral_analysis(layout_mdp, num_games=1, horizon=200):
    """Analyse comportementale détaillée des GreedyAgent"""
    
    print(f"🔬 Analyse comportementale sur {num_games} partie(s)")
    
    # Configuration
    counter_params = COUNTERS_MLG_PARAMS.copy()
    if layout_mdp.counter_goals:
        counter_params["counter_goals"] = layout_mdp.counter_goals
        counter_params["counter_drop"] = layout_mdp.counter_goals
        counter_params["counter_pickup"] = layout_mdp.counter_goals
    
    env_params = {"horizon": horizon}
    evaluator = AgentEvaluator.from_mdp(layout_mdp, env_params, mlam_params=counter_params)
    
    # Agents
    agent1 = GreedyAgent()
    agent2 = GreedyAgent()
    agent1.set_mdp(layout_mdp)
    agent2.set_mdp(layout_mdp)
    
    # Évaluation avec informations détaillées
    start_time = time.time()
    results = evaluator.evaluate_agent_pair(
        AgentPair(agent1, agent2), 
        num_games=num_games, 
        native_eval=True
    )
    eval_time = time.time() - start_time
    
    print(f"⏱️ Temps d'évaluation: {eval_time:.2f}s")
    
    # Analyse des résultats
    analysis = {
        'evaluation_time': eval_time,
        'games_analyzed': num_games,
        'horizon_used': horizon
    }
    
    # Métriques de base
    if 'ep_rewards' in results:
        rewards = results['ep_rewards']
        analysis.update({
            'total_score': sum(rewards),
            'average_score': sum(rewards) / len(rewards),
            'score_variance': sum((r - analysis['average_score'])**2 for r in rewards) / len(rewards),
            'consistency': analysis['average_score'] / (analysis.get('score_variance', 1) + 1)
        })
        print(f"📊 Score total: {analysis['total_score']}")
        print(f"📈 Score moyen: {analysis['average_score']:.2f}")
        print(f"📉 Variance: {analysis['score_variance']:.2f}")
        print(f"🎯 Consistance: {analysis['consistency']:.2f}")
    
    if 'ep_lengths' in results:
        lengths = results['ep_lengths']
        analysis.update({
            'average_length': sum(lengths) / len(lengths),
            'completion_rate': sum(1 for l in lengths if l < horizon) / len(lengths)
        })
        print(f"⏱️ Durée moyenne: {analysis['average_length']:.1f} steps")
        print(f"✅ Taux de complétion: {analysis['completion_rate']*100:.1f}%")
    
    # Analyse des trajectoires si disponibles
    if 'mdp_params' in results:
        print(f"🗺️ Paramètres MDP: {len(results['mdp_params'])} configurations")
    
    if 'metadatas' in results:
        metadatas = results['metadatas']
        if metadatas and len(metadatas) > 0:
            # Analyser les métadonnées pour des insights comportementaux
            print(f"🔍 Métadonnées disponibles: {len(metadatas)} entrées")
            
            # Exemple d'analyse des métadonnées
            for i, metadata in enumerate(metadatas[:3]):  # Analyser les 3 premières
                if metadata and isinstance(metadata, dict):
                    print(f"   Partie {i+1}: {len(metadata)} événements")
                    for key, value in list(metadata.items())[:5]:  # Premiers 5 éléments
                        print(f"     {key}: {value}")
    
    # Calcul de métriques avancées
    if 'ep_rewards' in results and 'ep_lengths' in results:
        rewards = results['ep_rewards']
        lengths = results['ep_lengths']
        
        # Efficacité (score par step)
        efficiency = [r/l if l > 0 else 0 for r, l in zip(rewards, lengths)]
        analysis['average_efficiency'] = sum(efficiency) / len(efficiency)
        print(f"⚡ Efficacité moyenne: {analysis['average_efficiency']:.3f} points/step")
        
        # Performance temporelle
        if lengths:
            fast_games = sum(1 for l in lengths if l < horizon * 0.7)  # Terminé en moins de 70% du temps
            analysis['speed_performance'] = fast_games / len(lengths)
            print(f"🏃 Performance rapide: {analysis['speed_performance']*100:.1f}% des parties")
    
    return analysis, results

# Analyse comportementale sur le layout principal
behavioral_analysis, detailed_results = detailed_behavioral_analysis(mdp, num_games=2, horizon=300)

print(f"\n📋 RAPPORT D'ANALYSE COMPORTEMENTALE:")
print("="*50)

for metric, value in behavioral_analysis.items():
    if isinstance(value, float):
        print(f"   {metric}: {value:.3f}")
    else:
        print(f"   {metric}: {value}")

behavioral_analysis