# Matches game:

## 12 matches

- ## Each player can take 1, 2 or 3 matches

- ## <font color = red> Don’t be the last to take a match</font>
  
  
## Example from: <font color=green>Faustine Gusto </font>



In [0]:
from random import randint
import random
import numpy as np

---
# First we need an "environment" (game) simulator: 

- ### Input: <font color=blue>action</font>
- ### Outputs:  <font color=orange>next state (number of matches)</font> & <font color=orange>REWARD</font> ( -1 no more matches left)

In [0]:
class StickGame(object):
    """
        StickGame.
    """

    def __init__(self, nb):
        # @nb Number of stick to play with
        super(StickGame, self).__init__()
        self.original_nb = nb
        self.nb = nb

    def is_finished(self):
        # Check if the game is over @return Boolean
        if self.nb <= 0:
            return True
        return False

    def reset(self):
        # Reset the state of the game
        self.nb = self.original_nb
        return self.nb

    def display(self):
        # Display the state of the game
        print ("| " * self.nb)

    def step(self, action):
        # @action either 1, 2 or 3. Take an action into the environement
        self.nb -= action
        if self.nb <= 0:
            return None, -1
        else:
            return self.nb, 0


---
# We also need an "Agent" (StickPlayer class): 

- ### Input: <font color=blue>state</font>
- ### Outputs:  <font color=orange>next action</font>

---
---

## Also in StickPlayer class:

- ### game *history* is stored

- ### and there is a <font color=red>TRAINING</font> function for the <font color=magenta> Value Function V[ ]</font> which is a peculiar funtion

### <font color=FF103>TO DO: define this training function </font> 
    First you will need to understand first how is V used in  *greedy_step* function



In [0]:

class StickPlayer(object):
    """
        Stick Player
    """

    def __init__(self, is_human, size, trainable=True):
        # @nb Number of stick to play with
        super(StickPlayer, self).__init__()
        self.is_human = is_human
        self.history = []
        self.V = {}
        for s in range(1, size+1):
            self.V[s] = 0.
        self.win_nb = 0.
        self.lose_nb = 0.
        self.rewards = []
        self.eps = 0.99
        self.trainable = trainable

    def reset_stat(self):
        # Reset stat
        self.win_nb = 0
        self.lose_nb = 0
        self.rewards = []

    def greedy_step(self, state):
        # Greedy step
        actions = [1, 2, 3]
        vmin = None
        vi = None
        for i in range(0, 3):
            a = actions[i]
            if state - a > 0 and (vmin is None or vmin > self.V[state - a]):
                vmin = self.V[state - a]
                vi = i
        return actions[vi if vi is not None else 1]

    def play(self, state):
        # PLay given the @state (int)
        if self.is_human is False:
            # Take random action
            if random.uniform(0, 1) < self.eps:
                action = randint(1, 3)
            else: # Or greedy action
                action = self.greedy_step(state)
        else:
            action = int(input("$>"))
        return action

    def add_transition(self, n_tuple):
        # Add one transition to the history: tuple (s, a , r, s')
        self.history.append(n_tuple)
        s, a, r, sp = n_tuple
        self.rewards.append(r)

    def train(self):
        if not self.trainable or self.is_human is True:
            return

        # Update the value function if this player is not human
        for transition in reversed(self.history):
            s, a, r, sp = transition
            if r == 0:
                self.V[s] = self.V[s]
            else:
                self.V[s] = self.V[s]

        self.history = []



---
# This is the function to play the game!

## Note that:

- ### There are 2 Players!
- ### Who starts playing is randomized

### Players'  *history* is stored: <font color=blue>player.history</font>
### Players' results are also stored: <font color=blue>player.win_nb</font> & <font color=blue>player.lose_nb</font>

## If players' train flag is 'True' value function for non-human players are trained using history

---
---



In [0]:
def play(game, p1, p2, train=True):
    state = game.reset()
    players = [p1, p2]
    random.shuffle(players)
    p = 0
    while game.is_finished() is False:

        if players[p%2].is_human:
            game.display()

        action = players[p%2].play(state)
        n_state, reward = game.step(action)

        #  Game is over. Ass stat
        if (reward != 0):
            # Update stat of the current player
            players[p%2].lose_nb += 1. if reward == -1 else 0
            players[p%2].win_nb += 1. if reward == 1 else 0
            # Update stat of the other player
            players[(p+1)%2].lose_nb += 1. if reward == 1 else 0
            players[(p+1)%2].win_nb += 1. if reward == -1 else 0

        # Add the reversed reward and the new state to the other player
        if p != 0:
            s, a, r, sp = players[(p+1)%2].history[-1]
            players[(p+1)%2].history[-1] = (s, a, reward * -1, n_state)

        players[p%2].add_transition((state, action, reward, None))

        state = n_state
        p += 1

    
    if train:
        p1.train()
        p2.train()


---
# Now you can try playing against a random Agent (player)

---
---



In [0]:
game = StickGame(12)


# Human player and random player
human = StickPlayer(is_human=True, size=12, trainable=False)
random_player = StickPlayer(is_human=False, size=12, trainable=False)



In [0]:
play(game, human, random_player)

print('Human wins: ', human.win_nb, ' Random player wins: ', random_player.win_nb)

print('Player p1 history \n ',human.history)
print('Player p2 history \n ',random_player.history)

---
# ... you can try two random Agents....

---
---



In [0]:
game = StickGame(12)

# Random Players to train
p1_rand = StickPlayer(is_human=False, size=12, trainable=False)
p2_rand = StickPlayer(is_human=False, size=12, trainable=False)

print(p1_rand.eps)
print(p2_rand.eps)

p1_rand.reset_stat()
p2_rand.reset_stat()

In [0]:
play(game, p1_rand, p2_rand)

print('p1 rand wins : ', p1_rand.win_nb, 'p2 rand wins: ', p2_rand.win_nb)

print('Player p1 rand history \n ',p1_rand.history)
print('Player p2 rand history \n ',p2_rand.history)

---
# Now we <font color=red>TRAIN</font> two Agents....

---
---



In [0]:

game = StickGame(12)

# PLayers to train
p1 = StickPlayer(is_human=False, size=12, trainable=True)
p2 = StickPlayer(is_human=False, size=12, trainable=True)


# Train the agent
for i in range(0, 10000):
    if i % 10 == 0:
      p1.eps = max(p1.eps*0.996, 0.05)
      p2.eps = max(p2.eps*0.996, 0.05)
    play(game, p1, p2)

p1.reset_stat()




In [0]:
# Display the value function
for key in p1.V:
     print(key, p1.V[key])
     print("------------------------------")

## ...try against a random agent

In [0]:
# Play agains a random player
for _ in range(0, 1000):
     play(game, p1, random_player, train=False)
print("p1 win rate", p1.win_nb/(p1.win_nb + p1.lose_nb))



---
# Now you can try playing against a <font color=red>trained</fon> Agent !!!

---
---



In [0]:
play(game, p1, human, train=False)

print('p1 wins : ', p1.win_nb, 'human wins: ', human.win_nb)

print('Player p1 rand history \n ',p1.history)
print('Player human history \n ',human.history)
