In [18]:
from Game import *

* Two Dice Pig
* Same rules as Pig except:
    * Two standard dice are rolled
    * If neither shows a 1, their sum is added to the turn total.
    * If a single 1 is rolled, the player scores nothing and the turn ends.
    * If two 1s are rolled, the player’s entire score is lost, and the turn ends.
    * If a double is rolled, the point total is added to the turn total as with any roll but the player must roll again

In [19]:
def initial_state():
    # player 1 score, player 2 score, turn score, last dice1 roll, last dice2 roll
    return [0,0,0,0,0]

In [20]:
def valid_moves(state,player):
    player1_total,player2_total,turn_total,last_die1,last_die2=state
    if state[3]==state[4]:
        return(['roll'])
    else:
        return(['hold','roll'])

In [21]:
def show_state(state):
    print("Player 1 score:",state[0])
    print("Player 2 score:",state[1])    
    print("Turn score:",state[2])    
    print("Last die (#1):",state[3])
    print("Last die (#2):",state[4])
    print('___________________')

In [22]:
def win_status(state,player):
    goal=50
    
    player1_total,player2_total,turn_total,last_die1,last_die2=state
    
    if player==1:
        if player1_total + turn_total>=goal:
            return "win"
        
    elif player==2:
        if player2_total + turn_total>=goal:
            return "win"
    else:
        raise ValueError("You can't get there from here.")

In [23]:
def update_state(state,player,move):
    new_state=state
    player1_total,player2_total,turn_total,last_die1,last_die2=state
    
    
    if move=="hold":
        if player==1:
            new_state[0]=player1_total+turn_total
        else:
            new_state[1]=player2_total+turn_total
        
        new_state[2]=0  # reset the turn total
        new_state[3]=0  # this resets the first die
        new_state[4]=0  # this resets the second die
        
    elif move=="roll":
        dice1=random.randint(1,6)  # this generates a value from 1-6
        dice2=random.randint(1,6)  # this generates a value from 1-6
        
        new_state[3]=dice1
        new_state[4]=dice2
        
        if dice1==1 and dice2 !=1:
            new_state[2]=0  # reset the turn total
        elif dice1 !=1 and dice2==1:
            new_state[2]=0  # reset the turn total
        elif dice1==1 and dice2==1:
            if player==1:
                new_state[0]=0 # sets player 1 score back to 0 (or player1_total)
            else:
                new_state[1]=0 # sets player 2 score back to 0 (or player2_total)
        else:
            new_state[2]=turn_total+dice1+dice2  # update the turn total
        
    else:
        raise ValueError("You can't get there from here.")
        
    
    return new_state

In [24]:
def repeat_move(state,player,move):
    player1_total,player2_total,turn_total,last_die1,last_die2=state

    # check for die roll of 1 or 0
    # check for turn total > 0
    if last_die1==1 or last_die2==1 or last_die1==0 or last_die2==0:
        return False
    else:
        return True

In [25]:
def human_move(state,player):   
    
    move=input("Roll or Hold?").lower()
    
    if move[0]=='r':
        return "roll"
    else:
        return "hold"

human_agent=Agent(human_move)  

In [26]:
def random_move(state,player):    
    moves=valid_moves(state,player)
    return random.choice(moves)
 
random_agent=Agent(random_move)   

In [27]:
from Game.minimax import *
def minimax_move(state,player):
    values,moves=minimax_values(state,player,display=True)#set max depth
    
    return top_choice(moves,values)
    
minimax_agent=Agent(minimax_move)

In [28]:
def skittles_move(state,player,info):
    T=info.T
    last_state=info.last_state
    last_action=info.last_action
    
    
    if state not in T:
        actions=valid_moves(state,player)
        T[state]=Table()
        for action in actions:
            T[state][action]=3  # number of skittles
        
    
    move=weighted_choice(T[state])
    
    if move is None:  # can't win from this state
        if not last_state is None:
            T[last_state][last_action]-=1   # take away a skittle
            if T[last_state][last_action]<0:
                T[last_state][last_action]=0
    
        move=random_move(state,player)
    
    return move

def skittles_after(status,player,info):  # this is called after the game is over
    T=info.T
    last_state=info.last_state
    last_action=info.last_action

    if status=='lose':
        T[last_state][last_action]-=1   # take away a skittle
        if T[last_state][last_action]<0:
            T[last_state][last_action]=0

In [29]:
skittles_agent1=Agent(skittles_move)
skittles_agent1.T=Table()
skittles_agent1.post=skittles_after

skittles_agent2=Agent(skittles_move)
skittles_agent2.T=Table()
skittles_agent2.post=skittles_after

In [30]:
def Q_move(state,player,info):
    Q=info.Q
    last_state=info.last_state
    last_action=info.last_action
    learning=info.learning
    
    α=info.α  # learning rate
    ϵ=info.ϵ  # how often to take a random move
    γ=info.γ  # memory constant -- how quickly does the table update back in time (earlier in the game)
    
    # \alpha <hit tab>    α
    # \epsilon <hit tab>  ϵ
    # \gamma <hit tab>    γ
    
    if state not in Q:
        actions=valid_moves(state,player)
        Q[state]=Table()
        for action in actions:
            Q[state][action]=0  # initial value of table
    
    if learning:
        if random.random()<ϵ:  # take a random move occasionally to explore the environment
            move=random_move(state,player)
        else:
            move=top_choice(Q[state])
    else:
        move=top_choice(Q[state])
    
    if not last_action is None:  # not the first move
        reward=0
        
        # learn
        if learning:
            Q[last_state][last_action]+=α*(reward +
                        γ*max([Q[state][a] for a in Q[state]]) - Q[last_state][last_action])
    
    return move

In [31]:
def Q_after(status,player,info):
    Q=info.Q
    last_state=info.last_state
    last_action=info.last_action
    learning=info.learning
    
    α=info.α  # learning rate
    ϵ=info.ϵ  # how often to take a random move
    γ=info.γ  # memory constant -- how quickly does the table update back in time (earlier in the game)
    
    # \alpha <hit tab>    α
    # \epsilon <hit tab>  ϵ
    # \gamma <hit tab>    γ

    if status=='lose':
        reward=-1
    elif status=='win':
        reward=1
    elif status=='stalemate':
        reward=.5 # value stalemate a little closer to a win
    else:
        reward=0
    
    
    if learning:
        Q[last_state][last_action]+=α*(reward - Q[last_state][last_action])

In [32]:
Q1_agent=Agent(Q_move)
Q1_agent.post=Q_after
Q1_agent.Q=Table()  # makes an empty table
Q1_agent.learning=True

Q1_agent.α=0.3  # learning rate
Q1_agent.ϵ=0.1  # how often to take a random move
Q1_agent.γ=0.9  # memory constant -- how quickly does the table update back in time (earlier in the game)

In [33]:
Q2_agent=Agent(Q_move)
Q2_agent.post=Q_after
Q2_agent.Q=Table()  # makes an empty table
Q2_agent.learning=True

Q2_agent.α=0.3  # learning rate
Q2_agent.ϵ=0.1  # how often to take a random move
Q2_agent.γ=0.9  # memory constant -- how quickly does the table update back in time (earlier in the game)