In [3]:
import random
import time
import math

In [4]:
current_time = int(time.time())
random.seed(current_time)

In [5]:
probability_LUT = {1:1, 2:5/6, 3:4/6, 4:3/6, 5:2/6, 6:1/6}

In [19]:
def print2(l):
    print("Print2 | ",end="")
    for elm in l:
        print(elm,end=", ")
    print()

class State:

    def __init__(self,array=list((3,2,1))) -> None:
        self.nim = array
        self.turn = 1

    def __str__(self):
        return f"{self.turn}: {self.nim}"
    
    def flip_turn(self):
        if self.turn == -1:
            return 1
        elif self.turn == 1:
            return -1

    def gen_child(self):
        childs = []
        for i in range(len(self.nim)):
            for j in range(min(self.nim[i],6)):         # add min(,6) to remove only upto the die
                child_state_nim = self.nim.copy()
                child_state_nim[i] = child_state_nim[i] - (j+1)
                child_state = State(child_state_nim)
                child_state.turn = self.flip_turn()
                child_state_tup = (child_state, j+1)
                childs.append(child_state_tup)
        return childs
    
    def find_winner(self):
        if sum(self.nim) == 0:
            return -self.turn
        return 0
        
    def stochastic_minimax(self):
        if self.find_winner() != 0:
            # print(f"Child| {str(self)}")
            return self.find_winner()
        
        if self.turn == 1: # maximizing
            children = self.gen_child()
            max_score = float("-inf")
            for child, remove_value in children:
                score = child.stochastic_minimax()
                expected_value = score * probability_LUT[remove_value]
                # print(f"Child| {str(child)} : Score {expected_value}")
                if expected_value > max_score:
                    max_score = expected_value
            return max_score
        
        elif self.turn == -1: # minimizing
            children = self.gen_child()
            min_score = float("inf")
            for child, remove_value in children:
                score = child.stochastic_minimax()
                expected_value = score * probability_LUT[remove_value]
                # print(f"Child| {str(child)} : Score {expected_value}")
                if expected_value < min_score:
                    min_score = expected_value
            return min_score
        
    def make_move(self, die_roll=0):
        if die_roll <= 0:
            die_roll = random.randint(1,6)
        best_state = None
        best_score = float("-inf")
        for i in range(len(self.nim)):
            for j in range(min(die_roll,self.nim[i])):
                child_state_nim = self.nim.copy()
                child_state_nim[i] = child_state_nim[i] - (j+1)
                child_state = State(child_state_nim)
                child_state.turn = self.flip_turn()
                child_state_score = child_state.stochastic_minimax()
                print(f"Child|| {str(child_state)} : Score {child_state_score}\n")
                if child_state_score > best_score:
                    best_state = child_state
                    best_score = child_state_score
        return best_state


In [20]:
s = State([4,0,0])
c = s.make_move(3)

Child|| -1: [3, 0, 0] : Score -0.6666666666666666

Child|| -1: [2, 0, 0] : Score -0.8333333333333334

Child|| -1: [1, 0, 0] : Score -1



In [8]:
print(c)

-1: [3, 0, 0]


In [9]:
c.stochastic_minimax()

-0.6666666666666666

In [24]:
# state = [pile1,pile2,pile3]

probability_LUT = {1:1, 2:5/6, 3:4/6, 4:3/6, 5:2/6, 6:1/6}

def utility(state):
    if sum(state) == 0:
        return 1
    return 0

def minimax(state,maximizing):
    if maximizing:
        if utility(state) == 1:
            return -1
        max_val = -math.inf
        for i in range(3):
            for j in range(1,min(state[i]+1,7)):
                new_state = state.copy()
                new_state[i] = new_state[i] - j
                new_val = probability_LUT[j]*minimax(new_state,False)
                if new_val > max_val:
                    max_val = new_val
        return max_val
    else:
        if utility(state) == 1:
            return 1
        min_val = math.inf
        for i in range(3):
            for j in range(1,min(state[i]+1,7)):
                new_state = state.copy()
                new_state[i] = new_state[i] - j
                new_val = probability_LUT[j]*minimax(new_state,True)
                if new_val < min_val:
                    min_val = new_val
        return min_val
    
def get_best_move(state,die_roll):
    if die_roll <= 0 or die_roll > 6:
        die_roll = random.randint(1,6)
    max_val = -math.inf
    best_move = None
    for i in range(3):
        for j in range(1,min(state[i]+1,die_roll+1)):
            if best_move == None:
                best_move = (i,j)
            new_state = state.copy()
            new_state[i] = new_state[i] - j
            new_val = minimax(new_state,False) 
            # no need for prob as we know exactly what happened
            if new_val > max_val:
                max_val = new_val
                best_move = (i,j)
            # print(i,j,new_val)
    return best_move
    
    

In [22]:
s = [4,0,0]
c = get_best_move(s,3)

In [23]:
c

(0, 1)

In [27]:
# state = [pile1,pile2,pile3]

probability_LUT = {1:1, 2:5/6, 3:4/6, 4:3/6, 5:2/6, 6:1/6}

def utility(state):
    if sum(state) == 0:
        return 1
    return 0

def minimax(state,maximizing):
    if maximizing:
        if utility(state) == 1:
            return -1
        sum_val = 0
        for i in range(3):
            for j in range(1,min(state[i]+1,7)):
                new_state = state.copy()
                new_state[i] = new_state[i] - j
                new_val = probability_LUT[j]*minimax(new_state,False)
                sum_val += new_val
        return sum_val
    else:
        if utility(state) == 1:
            return 1
        sum_val = 0
        for i in range(3):
            for j in range(1,min(state[i]+1,7)):
                new_state = state.copy()
                new_state[i] = new_state[i] - j
                new_val = probability_LUT[j]*minimax(new_state,True)
                sum_val += new_val
        return sum_val
    
def get_best_move(state,die_roll):
    if die_roll <= 0 or die_roll > 6:
        die_roll = random.randint(1,6)
    max_val = -math.inf
    best_move = None
    for i in range(3):
        for j in range(1,min(state[i]+1,die_roll+1)):
            if best_move == None:
                best_move = (i,j)
            new_state = state.copy()
            new_state[i] = new_state[i] - j
            new_val = minimax(new_state,False) # no need for prob as we know exactly what happened
            if new_val > max_val:
                max_val = new_val
                best_move = (i,j)
            print(i,j,new_val)
    return best_move
    
    

In [28]:
s = [4,0,0]
c = get_best_move(s,3)
c

0 1 1.1102230246251565e-16
0 2 0.16666666666666663
0 3 -1


(0, 2)