# Tester sa stratégie au 421

L'objectif est d'apprendre à jouer au jeu du [421](https://fr.wikipedia.org/wiki/421_(jeu)) contre un adversaire. 

Il y a 21 pions au départ, répartis aléatoirement : c'est la *charge*. <br>Pour la *décharge*, vous devez établir 2 stratégies *déterministes* : 
* l'une en tant que meneur (vous avez la main et décidez quand vous arrêter), 
* l'autre en tant qu'opposant (vous jouez en fonction de la combinaison et du nombre de coups du meneur).

Votre stratégie devra être sauvegardée sous la forme de 2 fichiers csv au format précisé ci-dessous. <br>
Une fonction donne le résultat (aléatoire) d'une manche entre 2 joueurs, après charge et décharge.


## Init

In [34]:
import numpy as np

In [35]:
total_budget = 21

In [36]:
dice = np.arange(1,7)
throws = np.arange(1,4)

In [5]:
def get_scores():
    score = {(4,2,1): 11, (1,1,1): 7, (2,2,1): 0}
    score.update({(d,1,1): d for d in dice if d > 1})
    score.update({(d,d,d): d for d in dice if d > 1})
    score.update({(d,d - 1,d - 2): 2 for d in dice if d > 2})
    specials = list(score.keys())
    score.update({(d,e,f): 1 for d in dice 
                  for e in dice if e <= d 
                  for f in dice if f <= e and (d,e,f) not in specials})
    return score

In [6]:
score = get_scores()

In [7]:
reverse_dice = np.arange(6, 0, -1)

In [8]:
def get_rank():
    rank = {(4,2,1): 0, (1,1,1): 1}
    rank.update({(d,1,1): 2 + i for i,d in enumerate(reverse_dice) if d > 1}) 
    rank.update({(d,d,d): 7 + i for i,d in enumerate(reverse_dice) if d > 1})
    rank.update({(d,d - 1,d - 2): 12 + i for i,d in enumerate(reverse_dice) if d > 2})
    specials = list(rank.keys())    
    i = 16
    for d in reverse_dice:
        for e in range(d, 0, -1):
            for f in range(e, 0, -1):
                if (d,e,f) not in specials:
                    rank[(d,e,f)] = i
                    i += 1
    return rank

In [9]:
rank = get_rank()

## Load policy

Les 2 politiques (en tant que meneur et en tant qu'opposant) doivent être enregistrés au format csv comme suit :
* lead.csv: **state, throw, action**<br>
Exemple: 2,1,1,2,1,0,0 <br>
-> dans l'état (2,1,1) avec 2 lancers restants, action (1,0,0) (on relance le premier dé)
* opponent.csv: **target state, state, throw, action**<br>
Exemple: 4,2,1,4,2,2,2,0,0,1 <br>
-> pour l'état cible (4,2,1), dans l'état (4,2,2) avec 2 lancers restants, action (0,0,1) (on relance le dernier dé)

Notes :
* l'état est en ordre inverse de valeur des dés (par exemple, 4,2,1 et non 4,1,2)
* l'action par défaut est (0,0,0) (donc un fichier vide = je m'arrête tout le temps)
* le nombre de lancers restants est 1 ou 2

In [10]:
import csv

In [11]:
lead = {((2,1,1),2): (1,0,1)}

In [12]:
def save_lead(lead, filename = 'ex_lead.csv'):
    with open(filename, mode='w') as csvfile:
        writer = csv.writer(csvfile)
        for (state, throw) in lead:
            row = [state[i] for i in range(3)]
            row += [throw]
            action = lead[(state, throw)]
            row += [action[i] for i in range(3)]
            writer.writerow(row)

In [13]:
save_lead(lead)

In [14]:
def load_lead(filename = 'ex_lead.csv'):
    lead = {}
    with open(filename) as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            state = tuple(np.array(row[0:3], dtype = int))
            throw = int(row[3])
            action = tuple(np.array(row[4:], dtype = int))
            lead[(state, throw)] = action
    return lead

In [15]:
lead = load_lead()

In [38]:
lead

{((2, 1, 1), 2): (1, 0, 1)}

In [16]:
opponent = {((4,2,1),(4,2,2),2): (0,0,1)}

In [17]:
def save_opponent(opponent, filename = 'ex_opponent.csv'):
    with open(filename, mode='w') as csvfile:
        writer = csv.writer(csvfile)
        for (lead_state, state, throw) in opponent:
            row = [lead_state[i] for i in range(3)]
            row += [state[i] for i in range(3)]
            row += [throw]
            action = opponent[(lead_state, state, throw)]
            row += [action[i] for i in range(3)]
            writer.writerow(row)

In [18]:
save_opponent(opponent)

In [19]:
def load_opponent(filename = 'ex_opponent.csv'):
    opponent = {}
    with open(filename) as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            lead_state = tuple(np.array(row[0:3], dtype = int))
            state = tuple(np.array(row[3:6], dtype = int))
            throw = int(row[6])                
            action = tuple(np.array(row[7:], dtype = int))
            opponent[(lead_state, state, throw)] = action
    return opponent

In [20]:
opponent = load_opponent()

## Play the game

In [21]:
def random_state():
    a = np.random.choice(6) + 1
    b = np.random.choice(6) + 1
    c = np.random.choice(6) + 1
    return tuple(sorted((a,b,c),reverse = True))

In [22]:
def transition_prob(state, action):
    prob = {}
    nb = np.sum(action)
    for i in range(6**nb):
        new_state = np.array(state)
        die = i
        for j in range(3):
            if action[j]:
                new_state[j] = die % 6 + 1
                die = die // 6
        new_state = tuple(sorted(new_state, reverse = True))
        if new_state in prob:
            prob[new_state] += 1 / 6**nb
        else:
            prob[new_state] = 1 / 6**nb
    return prob

In [23]:
def move(state, action):
    prob = transition_prob(state, action)
    i = np.random.choice(np.arange(len(prob)), p = list(prob.values()))
    return list(prob.keys())[i]

In [24]:
def get_score(state1, state2):
    if rank[state1] < rank[state2]:
        # player 1 wins
        return score[state1]
    elif rank[state1] > rank[state2]:
        # player 2 wins
        return -score[state2]
    else:
        # random tie breaking
        return (1 - 2 * np.random.choice(2)) * score[state1]    

In [25]:
# Score of the leader (single round)

def lead_score(lead, opponent, lead_budget):
    # leader
    state = random_state()
    throw = 2
    while throw > 0 and (state, throw) in lead and lead[(state, throw)] != (0,0,0):
        action = lead[(state, throw)]
        state = move(state, action)
        throw -= 1
    lead_state = state
    # opponent
    state = random_state()
    throw = 2 - throw
    while throw > 0 and (lead_state, state, throw) in opponent and opponent[(lead_state, state, throw)] != (0,0,0):
        action = opponent[(lead_state, state, throw)]
        state = move(state, action)
        throw -= 1
    return get_score(lead_state, state)

In [26]:
# Winner of a game (player 1 or player 2)

def game(lead1, opponent1, lead2, opponent2):
    # charge
    budget = total_budget
    budget1 = 0
    budget2 = 0
    while budget > 0:
        state1 = random_state()
        state2 = random_state()
        result =  get_score(state1, state2)
        cost = min(abs(result), budget)
        if result > 0:
            budget2 += cost
            leader = 2
        else:
            budget1 += cost
            leader = 1
        budget -= cost
    # decharge
    while budget1 > 0 and budget2 > 0:
        if leader == 1:
            result = lead_score(lead1, opponent2, budget1)
            budget1 -= result
            budget2 += result
            if result > 0:
                leader = 2
        else:
            result = lead_score(lead2, opponent1, budget2)
            budget1 += result
            budget2 -= result
            if result > 0:
                leader = 1 
    if budget1 > 0:
        return 1
    else:
        return 2

In [39]:
lead

{((2, 1, 1), 2): (1, 0, 1)}

In [42]:
opponent2

{((4, 2, 1), (4, 2, 2), 2): (0, 0, 1)}

In [27]:
lead1 = lead
opponent1 = opponent
lead2 = lead
opponent2 = opponent

In [33]:
game(lead, opponent, lead, opponent)

1