In [5]:
!nvidia-smi
!pip install rlcard tensorflow
!pip install rlcard[torch]

Mon Sep 30 22:12:38 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 561.09                 Driver Version: 561.09         CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3070 ...  WDDM  |   00000000:01:00.0  On |                  N/A |
| N/A   40C    P8             14W /  115W |    1911MiB /   8192MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

  You can safely remove it manually.
  You can safely remove it manually.




In [9]:
import numpy as np

import rlcard
from rlcard.models.model import Model

class UNORuleAgentV2(object):
    ''' UNO Rule agent version 2
    '''

    def __init__(self):
        self.use_raw = True

    def step(self, state):
        ''' Predict the action given raw state. A naive rule. Choose the color
            that appears least in the hand from legal actions. Try to keep wild
            cards as long as it can.

        Args:
            state (dict): Raw state from the game

        Returns:
            action (str): Predicted action
        '''

        legal_actions = state['raw_legal_actions']
        state = state['raw_obs']
        if 'draw' in legal_actions:
            return 'draw'

        hand = state['hand']

        # If we have wild-4 simply play it and choose color that appears most in hand
        for action in legal_actions:
            if action.split('-')[1] == 'wild_draw_4':
                color_nums = self.count_colors(self.filter_wild(hand))
                action = max(color_nums, key=color_nums.get) + '-wild_draw_4'
                return action

        # Without wild-4, we randomly choose one
        action = np.random.choice(self.filter_wild(legal_actions))
        return action

    def eval_step(self, state):
        ''' Step for evaluation. The same to step
        '''
        return self.step(state), []

    @staticmethod
    def filter_wild(hand):
        ''' Filter the wild cards. If all are wild cards, we do not filter

        Args:
            hand (list): A list of UNO card string

        Returns:
            filtered_hand (list): A filtered list of UNO string
        '''
        filtered_hand = []
        for card in hand:
            if not card[2:6] == 'wild':
                filtered_hand.append(card)

        if len(filtered_hand) == 0:
            filtered_hand = hand

        return filtered_hand

    @staticmethod
    def count_colors(hand):
        ''' Count the number of cards in each color in hand

        Args:
            hand (list): A list of UNO card string

        Returns:
            color_nums (dict): The number cards of each color
        '''
        color_nums = {}
        for card in hand:
            color = card[0]
            if color not in color_nums:
                color_nums[color] = 0
            color_nums[color] += 1

        return color_nums

class UNORuleModelV2(Model):
    ''' UNO Rule Model version 2
    '''

    def __init__(self):
        ''' Load pretrained model
        '''
        env = rlcard.make('uno')

        rule_agent = UNORuleAgentV2()
        self.rule_agents = [rule_agent for _ in range(env.num_players)]

    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game

        Returns:
            agents (list): A list of agents

        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return self.rule_agents

    @property
    def use_raw(self):
        ''' Indicate whether use raw state and action

        Returns:
            use_raw (boolean): True if using raw state and action
        '''
        return True

In [9]:
import rlcard
from rlcard import models
from rlcard.agents.human_agents.uno_human_agent import HumanAgent, _print_action

# Make environment
env = rlcard.make('uno')
human_agent = HumanAgent(env.num_actions)
rule_agent = UNORuleModelV2().agents[0]
env.set_agents([
    human_agent,
    rule_agent,
])

while (True):
    print(">> Start a new game")

    trajectories, payoffs = env.run(is_training=False)
    # If the human does not take the final action, we need to
    # print other players action
    final_state = trajectories[0][-1]
    action_record = final_state['action_record']
    state = final_state['raw_obs']
    _action_list = []
    for i in range(1, len(action_record)+1):
        if action_record[-i][0] == state['current_player']:
            break
        _action_list.insert(0, action_record[-i])
    for pair in _action_list:
        print('>> Player', pair[0], 'chooses ', end='')
        _print_action(pair[1])
        print('')

    print('===============     Result     ===============')
    if payoffs[0] > 0:
        print('You win!')
    else:
        print('You lose!')
    print('')
    user_input = input("Press any key to continue...")
    if user_input == '':
        break

>> Start a new game
{'hand': ['y-9', 'y-7', 'r-0', 'y-8', 'r-6', 'b-1', 'b-wild'], 'target': 'r-6', 'played_cards': ['r-6'], 'legal_actions': ['r-0', 'r-6', 'r-wild', 'g-wild', 'b-wild', 'y-wild'], 'num_cards': [7, 7], 'num_players': 2, 'current_player': 0}

[33m9[0m, [33m7[0m, [31m0[0m, [33m8[0m, [31m6[0m, [34m1[0m, Wild
[31m6[0m
Player 1 has 7 cards.
0: [31m0[0m, 1: [31m6[0m, 2: [31mWild[0m, 3: [32mWild[0m, 4: [34mWild[0m, 5: [33mWild[0m



ValueError: invalid literal for int() with base 10: 'm'

In [2]:
# TODO
import rlcard
from rlcard.agents.random_agent import RandomAgent
from rlcard.models.uno_rule_models import UNORuleModelV1

def evaluate_agents(num_games=10000):
    # Créer l'environnement pour le jeu Uno
    env = rlcard.make('uno')

    # Initialiser l'agent basé sur des règles
    rule_agent = UNORuleModelV2().agents[0]

    # Initialiser un agent aléatoire
    random_agent = RandomAgent(num_actions=env.num_actions)

    # Associer les agents à l'environnement
    env.set_agents([rule_agent, random_agent])

    # Variables pour compter les résultats
    rule_agent_wins = 0
    random_agent_wins = 0

    # Lancer les parties
    for _ in range(num_games):
        # Exécuter une partie
        trajectories, payoffs = env.run(is_training=False)

        # Le payoff du premier joueur correspond à l'agent basé sur des règles
        if payoffs[0] > 0:
            rule_agent_wins += 1
        else:
            random_agent_wins += 1

    # Afficher les résultats finaux
    print(f"Après {num_games} parties :")
    print(f"Agent basé sur des règles a gagné {rule_agent_wins} fois")
    print(f"Agent aléatoire a gagné {random_agent_wins} fois")
    print(f"Taux de victoire de l'agent basé sur des règles : {rule_agent_wins / num_games:.2%}")
    print(f"Taux de victoire de l'agent aléatoire : {random_agent_wins / num_games:.2%}")

# Évaluer les agents sur 1000 parties
evaluate_agents(num_games=1000)


Après 1000 parties :
Agent basé sur des règles a gagné 548 fois
Agent aléatoire a gagné 452 fois
Taux de victoire de l'agent basé sur des règles : 54.80%
Taux de victoire de l'agent aléatoire : 45.20%


In [24]:
# TODO
import rlcard
from rlcard import models
from rlcard.agents.random_agent import RandomAgent
from rlcard.models.uno_rule_models import UNORuleModelV1
import torch

def evaluate_agents(num_games=10000, agents:list = [RandomAgent(num_actions=env.num_actions), RandomAgent(num_actions=env.num_actions)]):
    # Créer l'environnement pour le jeu Uno
    env = rlcard.make('uno')

    # Associer les agents à l'environnement
    env.set_agents(agents)

    # Variables pour compter les résultats
    first_agent_wins = 0
    second_agent_wins = 0

    # Lancer les parties
    for _ in range(num_games):
        # Exécuter une partie
        trajectories, payoffs = env.run(is_training=False)

        # Le payoff du premier joueur correspond à l'agent basé sur des règles
        if payoffs[0] > 0:
            first_agent_wins += 1
        else:
            second_agent_wins += 1

    # Afficher les résultats finaux
    print(f"Après {num_games} parties :")
    print(f"Agent 1 a gagné {first_agent_wins} fois")
    print(f"Agent 2 a gagné {second_agent_wins} fois")
    print(f"Taux de victoire de l'agent 1 : {first_agent_wins / num_games:.2%}")
    print(f"Taux de victoire de l'agent 2 : {second_agent_wins / num_games:.2%}")



# Initialiser l'agent basé sur des règles
rule_agent_1 = UNORuleModelV1().agents[0]
rule_agent_2 = UNORuleModelV2().agents[0]

# Chargement d'un modèle DQN
load_checkpoint_path = "experiments/model.pth"
dqn_agent=torch.load(load_checkpoint_path)

# Chargement d'un modèle CFR pré-entrainé
cfr_agent = models.load('leduc-holdem-cfr').agents[0]

# Initialisation d'un agent aléatoire
random_agent = RandomAgent(num_actions=61)

# Évaluer les agents sur 1000 parties
evaluate_agents(num_games=1000, agents=[dqn_agent, rule_agent_2])


  dqn_agent=torch.load(load_checkpoint_path)


Après 1000 parties :
Agent 1 a gagné 457 fois
Agent 2 a gagné 543 fois
Taux de victoire de l'agent 1 : 45.70%
Taux de victoire de l'agent 2 : 54.30%


In [22]:
import torch
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'nothing')


nothing
