# 6 qui Prend - The Card Game

## Game Rules

In [1]:
# Imports
import os
import numpy as np
from collections import deque
import tkinter as tk
import random

In [2]:
# Constants
NB_TURNS = 10
NB_CARDS = 104
NB_ROWS = 4
CARDS_PER_ROWS = 6

NB_PLAYERS = 2

In [3]:
class Card:
    def __init__(self, value):
        assert 1 <= value <= NB_CARDS
        # Value of the card
        self.value = value
        # Number of bullheads on the card
        if (value % 55 == 0):
            self.bullheads = 7
        elif(value % 11 == 0):
            self.bullheads = 5
        elif (value % 10 == 0):
            self.bullheads = 3
        elif (value % 10 == 5):
                self.bullheads = 2
        else:
            self.bullheads = 1
        

    def __str__(self):
        return f" |{self.value:3d}(*{self.bullheads}*)| "

class Deck:
    def __init__(self):
        self.cards = [Card(i) for i in range(1, NB_CARDS + 1)]
        np.random.shuffle(self.cards)

    def draw(self):
        assert len(self.cards) > 0
        return self.cards.pop()

In [4]:
class Player:
    def __init__(self, name):
        self.name = name
        self.hand = []
        self.bullheads = 0
    
    def choose_card(self, gameboard, all_played_cards):
        print(gameboard)
        print(self)
        try:
            card = int(input(f"{self.name}, choose a card: "))
            assert card in [c.value for c in self.hand], "Card not in hand"
        except:
            print("Please choose a valid card")
            return self.choose_card(gameboard, all_played_cards)
        return Card(card)
    
    def choose_row(self, gameboard, played, all_played_cards):
        print(gameboard)
        try:
            row = int(input(f"{self.name}, choose a row: "))
            assert 1 <= row <= NB_ROWS, "Row not in range"
        except:
            print("Please choose a valid row")
            return self.choose_row(gameboard, played, all_played_cards)
        return row

    def __str__(self):
        return f"{self.name}: {','.join(str(card) for card in self.hand)}"

In [5]:
class Gameboard:
    def __init__(self, deck):
        self.deck = deck
        self.board = [[deck.draw()] for _ in range(NB_ROWS)]
    
    def clear_row(self, row):
        bullheads = sum([card.bullheads for card in self.board[row][:-1]])
        self.board[row] = [self.board[row][-1]]
        return bullheads
    
    def can_play_card(self, card):
        return any([card.value > row[-1].value for row in self.board])
    
    def play_card(self, card):
        assert self.can_play_card(card)
        row = max((i for i in range(NB_ROWS) if self.board[i][-1].value < card.value), key=lambda i: self.board[i][-1].value)
        self.board[row].append(card)
        bullheads = 0
        if len(self.board[row]) >= CARDS_PER_ROWS:
            bullheads = self.clear_row(row)
        return bullheads

    def replace_row(self, card, row):
        assert not self.can_play_card(card)
        self.board[row].append(card)
        bullheads = self.clear_row(row)
        return bullheads

    def __str__(self):
        return "\n".join([
            "=-----------=" * CARDS_PER_ROWS + "\n" + \
            " ".join([str(card) for card in row]) for row in self.board]) \
            + "\n" + "=-----------=" * CARDS_PER_ROWS

In [6]:
class Game:
    def __init__(self, players, display=True):
        self.display = display
        # Initialize the deck and gameboard
        self.deck = Deck()
        self.gameboard = Gameboard(self.deck)
        self.players = players
        self.all_played_cards = []
        # Initialize the players
        self.init_players()

    def init_players(self):
        for player in self.players:
            player.hand = []
            player.bullheads = 0
            for _ in range(NB_TURNS):
                player.hand.append(self.deck.draw())
            player.hand.sort(key=lambda card: card.value)
    
    def get_cards(self):
        cards = []
        for player in self.players:
            card = player.choose_card(self.gameboard, self.all_played_cards)
            cards.append(card)
            player.hand.remove(card)
        return cards
    
    def place_card(self, cards, index_player):
        for i in index_player:
            player = self.players[i]
            card = cards[i]
            if self.display:
                print(f"{player.name} played {card}")

            bullheads = self.gameboard.play_card(card)
            player.bullheads += bullheads
            if self.display:
                print(f"{player.name} got {bullheads} bullheads")
        
    
    def play(self):
        # Play the game for NB_TURNS turns
        for _ in range(NB_TURNS):
            index_player = [i for i in range(len(self.players))]
            
            # Get the cards played by each player
            played = self.get_cards()
            
            # Sort the players by the card they played
            index_player = sorted(index_player, key=lambda i: played[i].value)
            
            # Replace a row if the lowest card cannot be placed next in the row
            if not self.gameboard.can_play_card(played[index_player[0]]):
                player = self.players[index_player[0]]
                row = player.choose_row(self.gameboard, played, self.all_played_cards)
                bullheads = self.gameboard.sum_bullheads(row - 1)
                self.gameboard.replace_row(played[index_player[0]], row - 1)
                player.bullheads += bullheads
                if self.display:
                    print(f"{player.name} got {bullheads} bullheads")
                index_player.pop(0)
            
            # Process each player's card
            self.place_card(played, index_player)
            
            # Add the played cards to the list of all played cards
            self.all_played_cards.extend(played)
                        
            if self.display:
                print(self.gameboard)
                
        # End the game and determine the winner
        if self.display:
            self.end_game()                

    def end_game(self):
        # Determine the winner (player with the fewest bullheads)
        winner = min(self.players, key=lambda player: player.bullheads)
        print(f"The winner is {winner.name} with {winner.bullheads} bullheads")
        
        # Print the bullheads for each player
        for player in self.players:
            print(f"{player.name} got {player.bullheads} bullheads")

In [7]:
class SafePlayer(Player):
    def choose_card(self, gameboard, all_played_cards):
        # Check if no card can be played
        if all([not gameboard.can_play_card(card) for card in self.hand]):
            return np.random.choice(self.hand)
        for card in self.hand:
            # Check where the card can be played
            if gameboard.can_play_card(card):
                return card

    def choose_row(self, gameboard, played, all_played_cards):
        return min(range(NB_ROWS), key=lambda i: gameboard.sum_bullheads(i)) + 1

In [8]:
class RandomPlayer(Player):
    def choose_card(self, gameboard, all_played_cards):
        return np.random.choice(self.hand) 

    def choose_row(self, gameboard, played, all_played_cards):
        return np.random.randint(1, NB_ROWS + 1)

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim

torch.cuda.empty_cache()
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")
print(f"Device: {device}")

Device: cpu


In [10]:
class QNetwork(nn.Module):
    def __init__(self, input_size, output_size):
        super(QNetwork, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, output_size)
        ).to(device)
        
    def forward(self, x):
        return self.fc(x)

In [11]:
# Hyperparameters
GAMMA = 0.99
LR = 0.001
BATCH_SIZE = 32
EPSILON = 1.0
EPSILON_MIN = 0.01
EPSILON_DECAY = 0.995
MEMORY_SIZE = 5000

In [12]:
class Bot(Player):
    def __init__(self, name):
        super(Bot, self).__init__(name)
        self.state_size = NB_TURNS + NB_ROWS * CARDS_PER_ROWS + NB_PLAYERS  + NB_PLAYERS * NB_TURNS  # Hand + Gameboard + Played cards this turn + All played cards
        self.action_size = NB_TURNS + NB_ROWS # Choose a card + Choose a row
        self.memory = deque(maxlen=2000)

        self.model = QNetwork(self.state_size, self.action_size)
        self.optimizer = optim.Adam(self.model.parameters(), lr=LR)
        self.criterion = nn.MSELoss()

        self.epsilon = EPSILON
    
    def get_state(self, gameboard, played, all_played_cards):
        state = np.zeros(self.state_size)
        index = 0
        # Hand
        for i, card in enumerate(self.hand):
            state[index + i] = card.value
        index += NB_TURNS
        # Gameboard
        for i, row in enumerate(gameboard.board):
            for j, card in enumerate(row):
                state[index + i * CARDS_PER_ROWS + j] = card.value
        index += NB_ROWS * CARDS_PER_ROWS
        # Played cards this turn
        for i, card in enumerate(played):
            state[index + i] = card.value
        index += NB_PLAYERS
        # All played cards
        for i, card in enumerate(all_played_cards):
            state[index + i] = card.value
        return state
        
    
    def choose_card(self, gameboard, all_played_cards):
        if np.random.rand() < self.epsilon:
            return np.random.choice(self.hand)
        else:
            state = self.get_state(gameboard, [], all_played_cards)
            state = torch.tensor(state, dtype=torch.float).to(device)
            q_values = self.model(state)[:NB_TURNS]
            return self.hand[torch.argmax(q_values).item()]
        
    def choose_row(self, gameboard, played, all_played_cards):
        if np.random.rand() < self.epsilon:
            return np.random.randint(NB_ROWS) + 1
        else:
            state = self.get_state(gameboard, played, all_played_cards)
            state = torch.tensor(state, dtype=torch.float).to(device)
            q_values = self.model(state)[NB_TURNS:]
            return torch.argmax(q_values).item() + 1
    
    def store_transition(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
        
    def train(self):
        if len(self.memory) < BATCH_SIZE:
            return
        batch = random.sample(self.memory, BATCH_SIZE)
        states, actions, rewards, next_states, dones = zip(*batch)
        states = torch.tensor(np.array(states), dtype=torch.float).to(device)
        actions = torch.tensor(actions, dtype=torch.long).to(device)
        rewards = torch.tensor(rewards, dtype=torch.float).to(device)
        next_states = torch.tensor(np.array(next_states), dtype=torch.float).to(device)
        dones = torch.tensor(dones, dtype=torch.float).to(device)
        
        # Train the model
        self.optimizer.zero_grad()
        q_values = self.model(states)

        next_q_values = self.model(next_states)
        q_value = q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
        next_q_value = next_q_values.max(1)[0]
        target = rewards + GAMMA * next_q_value * (1 - dones)
        
        loss = self.criterion(q_value, target)
        loss.backward()
        
        if self.epsilon > EPSILON_MIN:
            self.epsilon *= EPSILON_DECAY
        
    def save(self):
        torch.save(self.card_model.state_dict(), "card_model.pth")
        torch.save(self.row_model.state_dict(), "row_model.pth")
        
    def load(self):
        self.card_model.load_state_dict(torch.load("card_model.pth"))
        self.row_model.load_state_dict(torch.load("row_model.pth"))

In [13]:
class GameEnv(Game):
    def __init__(self, players):
        super(GameEnv, self).__init__(players, display=False)
        
    def reset(self):
        # Reset the game state
        self.deck = Deck()
        self.gameboard = Gameboard(self.deck)
        self.all_played_cards = []
        self.init_players()
    
    def get_cards(self):
        # Get the cards played by each player
        cards = []
        index_in_hand = []
        for player in self.players:
            card = player.choose_card(self.gameboard, self.all_played_cards)
            cards.append(card)
            index_in_hand.append(player.hand.index(card))
            player.hand.remove(card)
        # Get the choice of row for each player
        rows = [player.choose_row(self.gameboard, cards, self.all_played_cards) for player in self.players]
        # Return the cards and rows
        return cards, index_in_hand, rows
    
    def place_card(self, cards, index_player, bullheads_player):
        # Process each player's card
        for i in index_player:
            player = self.players[i]
            card = cards[i]
            bullheads = self.gameboard.play_card(card)
            player.bullheads += bullheads
            bullheads_player[i] += bullheads
        return bullheads_player
        
        
    def step(self):
        # Simulate one step of the game
        index_player = [i for i in range(len(self.players))]
        bullheads_player = [0 for _ in range(len(self.players))]
        
        # Get the cards played by each player
        played, index_in_hand, choosen_rows = self.get_cards()
        
        # Sort the players by the card they played
        index_player = sorted(index_player, key=lambda i: played[i].value)
        
        # Replace a row if the lowest card cannot be placed next in the row
        if not self.gameboard.can_play_card(played[index_player[0]]):
            row = choosen_rows[index_player[0]]
            bullheads = self.gameboard.replace_row(played[index_player[0]], row - 1)
            self.players[index_player[0]].bullheads += bullheads
            bullheads_player[index_player[0]] += bullheads
            index_player.pop(0)
        
        # Process each player's card
        bullheads_player = self.place_card(played, index_player, bullheads_player)
        
        # Add the played cards to the list of all played cards
        self.all_played_cards.extend(played)
            
        return played, index_in_hand, bullheads_player
        
             
    def train_bots(self, episodes):
        for episode in range(episodes):
            self.reset()
            state = [player.get_state(self.gameboard, [], self.all_played_cards) for player in self.players]
            done = False
            for _ in range(NB_TURNS):
                played, index_in_hand, bullheads_player = self.step()
                next_state = [player.get_state(self.gameboard, played, self.all_played_cards) for player in self.players]
                done = all([len(player.hand) == 0 for player in self.players])
                for i, player in enumerate(self.players):
                    reward = -bullheads_player[i]
                    player.store_transition(state[i], index_in_hand[i], reward, next_state[i], done)
                    player.train()
                state = next_state   

In [14]:
# Create the players
players = [Bot("Bot" + str(i)) for i in range(NB_PLAYERS)]
game = GameEnv(players)

# Train the bots
game.train_bots(1000)

IndexError: list index out of range