In [None]:
# ============================================
# IMPORTS
#pip install rlcard[torch]
#pip install pandas
#pip install numpy
# ============================================
import rlcard
from rlcard.agents import RandomAgent, CFRAgent
from rlcard.utils import set_seed
import numpy as np
import pandas as pd
import time
from collections import defaultdict

In [10]:
class RuleBasedAgent:
    """Agente agresivo: SIEMPRE prioriza cartas de ataque (+2, +4, Skip, Reverse)"""
    
    def __init__(self, num_actions):
        self.use_raw = False
        self.num_actions = num_actions
    
    def step(self, state):
        legal_actions = list(state['legal_actions'].keys())
        
        if not legal_actions:
            return None
        
        obs = state['obs']
        hand_size = np.sum(obs[:60])
        
        # Categorizar cartas
        draw_cards = []      # +2 y +4 (MÁS DAÑINAS)
        skip_reverse = []    # Skip y Reverse
        numbers = []         # Cartas numéricas
        wilds = []          # Wild normales
        
        for action in legal_actions:
            action_str = str(action).lower()
            if 'draw' in action_str:
                draw_cards.append(action)
            elif 'skip' in action_str or 'reverse' in action_str:
                skip_reverse.append(action)
            elif 'wild' in action_str:
                wilds.append(action)
            else:
                numbers.append(action)
        
        # ESTRATEGIA AGRESIVA:
        # Si tengo 1 carta, jugar lo que sea para ganar
        if hand_size <= 1:
            if numbers:
                return np.random.choice(numbers)
            elif draw_cards:
                return np.random.choice(draw_cards)
            elif skip_reverse:
                return np.random.choice(skip_reverse)
            else:
                return np.random.choice(wilds)
        
        # En cualquier otro caso: ATACAR PRIMERO
        # Prioridad: +2/+4 > Skip/Reverse > Números > Wilds
        if draw_cards:
            return np.random.choice(draw_cards)
        elif skip_reverse:
            return np.random.choice(skip_reverse)
        elif numbers:
            return np.random.choice(numbers)
        else:
            return np.random.choice(wilds)
    
    def eval_step(self, state):
        return self.step(state), []

In [11]:
class ProbabilisticAgent:
    """Agente defensivo: Calcula probabilidades para jugar conservador, guarda cartas especiales"""
    
    def __init__(self, num_actions):
        self.use_raw = False
        self.num_actions = num_actions
    
    def step(self, state):
        legal_actions = list(state['legal_actions'].keys())
        
        if not legal_actions:
            return None
        
        obs = state['obs']
        hand_size = np.sum(obs[:60])
        
        # Asignar probabilidades CONSERVADORAS
        probabilities = []
        for action in legal_actions:
            action_str = str(action).lower()
            prob = 1.0
            
            # ESTRATEGIA DEFENSIVA: Priorizar números, guardar especiales
            
            # Factor 1: Cartas numéricas = ALTA PROBABILIDAD
            if not any(x in action_str for x in ['wild', 'skip', 'reverse', 'draw']):
                prob *= 5.0  # MUCHO más probable jugar números
            
            # Factor 2: Cartas especiales = BAJA PROBABILIDAD (guardar)
            if 'wild' in action_str:
                if hand_size <= 2:
                    prob *= 3.0  # Solo usar wilds al final
                else:
                    prob *= 0.1  # EVITAR wilds hasta el final
            
            if 'draw' in action_str:
                if hand_size <= 3:
                    prob *= 2.0  # Usar +2/+4 solo si estás cerca de ganar
                else:
                    prob *= 0.3  # EVITAR gastar +2/+4 temprano
            
            if 'skip' in action_str or 'reverse' in action_str:
                if hand_size <= 4:
                    prob *= 1.5
                else:
                    prob *= 0.5  # Guardar skip/reverse
            
            # Factor 3: Si tenemos MUCHAS cartas, ser más agresivo
            if hand_size > 8:
                prob *= 1.5  # Urgencia por deshacerse de cartas
            
            probabilities.append(prob)
        
        # Normalizar probabilidades
        probabilities = np.array(probabilities)
        probabilities = probabilities / probabilities.sum()
        
        return np.random.choice(legal_actions, p=probabilities)
    
    def eval_step(self, state):
        return self.step(state), []


In [12]:
# ============================================
# SISTEMA DE EVALUACIÓN
# ============================================
class UNOEvaluator:
    """Sistema para evaluar y comparar agentes"""
    
    def __init__(self, env):
        self.env = env
        self.metrics = defaultdict(lambda: defaultdict(list))
    
    def play_game(self, agents, agent_names):
        """Juega una partida y recolecta métricas"""
        trajectories, payoffs = self.env.run(is_training=False)
        
        # Contar turnos totales de la partida
        total_turns = sum(len(traj) for traj in trajectories)
        
        game_metrics = {
            'agent_names': agent_names,
            'payoffs': payoffs,
            'num_turns': total_turns,
            'cards_remaining': []
        }
        
        # No intentar extraer cartas restantes (causa problemas)
        # Solo usar payoffs para determinar ganadores
        for _ in range(len(payoffs)):
            game_metrics['cards_remaining'].append(0)
        
        return game_metrics
    
    def evaluate_agents(self, agents_dict, num_games=1000):
        """Evalúa múltiples agentes jugando entre sí por parejas"""
        agent_names = list(agents_dict.keys())
        all_agents = list(agents_dict.values())
        
        print(f"Evaluando {len(agent_names)} agentes...")
        print(f"Nota: UNO soporta {self.env.num_players} jugadores por partida")
        
        results = {name: {
            'wins': 0,
            'losses': 0,
            'total_games': 0,
            'move_times': [],
            'turns_per_game': [],
            'scores': []
        } for name in agent_names}
        
        # Jugar partidas con parejas de agentes
        games_per_pair = num_games // (len(agent_names) * (len(agent_names) - 1) // 2)
        print(f"Jugando {games_per_pair} partidas por cada pareja de agentes")
        
        game_count = 0
        for i in range(len(agent_names)):
            for j in range(i + 1, len(agent_names)):
                agent1_name = agent_names[i]
                agent2_name = agent_names[j]
                agent1 = all_agents[i]
                agent2 = all_agents[j]
                
                print(f"\n{agent1_name} vs {agent2_name} ({games_per_pair} partidas)...")
                
                for game_num in range(games_per_pair):
                    try:
                        # Configurar agentes para esta partida
                        self.env.set_agents([agent1, agent2])
                        
                        # Medir tiempo de ejecución
                        start_time = time.time()
                        
                        # Jugar partida
                        trajectories, payoffs = self.env.run(is_training=False)
                        
                        elapsed = time.time() - start_time
                        total_turns = sum(len(traj) for traj in trajectories)
                        avg_time_per_move = elapsed / max(total_turns, 1)
                        
                        # Registrar resultados para agente 1
                        results[agent1_name]['total_games'] += 1
                        results[agent1_name]['move_times'].append(avg_time_per_move)
                        results[agent1_name]['turns_per_game'].append(total_turns)
                        
                        if payoffs[0] > 0:
                            results[agent1_name]['wins'] += 1
                            results[agent1_name]['scores'].append(1)
                        else:
                            results[agent1_name]['losses'] += 1
                            results[agent1_name]['scores'].append(0)
                        
                        # Registrar resultados para agente 2
                        results[agent2_name]['total_games'] += 1
                        results[agent2_name]['move_times'].append(avg_time_per_move)
                        results[agent2_name]['turns_per_game'].append(total_turns)
                        
                        if payoffs[1] > 0:
                            results[agent2_name]['wins'] += 1
                            results[agent2_name]['scores'].append(1)
                        else:
                            results[agent2_name]['losses'] += 1
                            results[agent2_name]['scores'].append(0)
                        
                        game_count += 1
                        if game_count % 50 == 0:
                            print(f"  Completadas {game_count} partidas totales")
                    
                    except Exception as e:
                        print(f"  Error en partida: {str(e)[:50]}")
                        continue
        
        return results
    
    def create_summary_dataframe(self, results):
        """Crea DataFrame con resumen de métricas"""
        summary_data = []
        
        for agent_name, metrics in results.items():
            if metrics['total_games'] > 0:
                summary_data.append({
                    'Agente': agent_name,
                    'Tasa_Victoria_%': (metrics['wins'] / metrics['total_games']) * 100,
                    'Partidas_Jugadas': metrics['total_games'],
                    'Victorias': metrics['wins'],
                    'Derrotas': metrics['losses'],
                    'Tiempo_Promedio_Jugada_ms': np.mean(metrics['move_times']) * 1000 if metrics['move_times'] else 0,
                    'Turnos_Promedio_Por_Juego': np.mean(metrics['turns_per_game']) if metrics['turns_per_game'] else 0,
                    'Desv_Est_Tiempo_ms': np.std(metrics['move_times']) * 1000 if metrics['move_times'] else 0
                })
        
        df = pd.DataFrame(summary_data)
        df = df.sort_values('Tasa_Victoria_%', ascending=False)
        return df
    
    def create_detailed_dataframe(self, results):
        """Crea DataFrame detallado con todas las partidas"""
        detailed_data = []
        
        for agent_name, metrics in results.items():
            for i in range(len(metrics['scores'])):
                detailed_data.append({
                    'Agente': agent_name,
                    'Partida': i + 1,
                    'Victoria': metrics['scores'][i],
                    'Tiempo_Jugada_ms': metrics['move_times'][i] * 1000 if i < len(metrics['move_times']) else 0,
                    'Turnos_Totales': metrics['turns_per_game'][i] if i < len(metrics['turns_per_game']) else 0
                })
        
        return pd.DataFrame(detailed_data)

In [16]:
def main():
    set_seed(42)
    env = rlcard.make('uno')
    
    print("Inicializando agentes...")
    
    # 5 agentes: los originales + agresivo
    agents_dict = {
        'Random': RandomAgent(num_actions=env.num_actions),
        'Reglas': RuleBasedAgent(num_actions=env.num_actions),
        'Probabilistico': ProbabilisticAgent(num_actions=env.num_actions),
        'CFR': CFRAgent(env, model_path=None)
    }
    
    print(f"Total de agentes: {len(agents_dict)}")
    
    # AUMENTAR A 3000 JUEGOS para tener más datos
    evaluator = UNOEvaluator(env)
    results = evaluator.evaluate_agents(agents_dict, num_games=100000)
    
    print("\n" + "="*60)
    print("RESUMEN DE RESULTADOS")
    print("="*60)
    
    summary_df = evaluator.create_summary_dataframe(results)
    print(summary_df.to_string(index=False))
    
    detailed_df = evaluator.create_detailed_dataframe(results)
    
    summary_df.to_csv('uno_agents_summary.csv', index=False)
    detailed_df.to_csv('uno_agents_detailed.csv', index=False)
    
    print("\n✓ Archivos guardados:")
    print("  - uno_agents_summary.csv (resumen por agente)")
    print("  - uno_agents_detailed.csv (datos de cada partida)")
    
    print("\n" + "="*60)
    print("ANÁLISIS COMPARATIVO")
    print("="*60)
    
    for agent_name in agents_dict.keys():
        metrics = results[agent_name]
        win_rate = (metrics['wins']/metrics['total_games']*100)
        print(f"\n{agent_name}:")
        print(f"  Victorias: {metrics['wins']}/{metrics['total_games']}")
        print(f"  Tasa victoria: {win_rate:.2f}%")
        print(f"  Tiempo: {np.mean(metrics['move_times'])*1000:.3f} ms")


if __name__ == "__main__":
    main()

Inicializando agentes...
Total de agentes: 4
Evaluando 4 agentes...
Nota: UNO soporta 2 jugadores por partida
Jugando 16666 partidas por cada pareja de agentes

Random vs Reglas (16666 partidas)...
  Completadas 50 partidas totales
  Completadas 100 partidas totales
  Completadas 150 partidas totales
  Completadas 200 partidas totales
  Completadas 250 partidas totales
  Completadas 300 partidas totales
  Completadas 350 partidas totales
  Completadas 400 partidas totales
  Completadas 450 partidas totales
  Completadas 500 partidas totales
  Completadas 550 partidas totales
  Completadas 600 partidas totales
  Completadas 650 partidas totales
  Completadas 700 partidas totales
  Completadas 750 partidas totales
  Completadas 800 partidas totales
  Completadas 850 partidas totales
  Completadas 900 partidas totales
  Completadas 950 partidas totales
  Completadas 1000 partidas totales
  Completadas 1050 partidas totales
  Completadas 1100 partidas totales
  Completadas 1150 partidas to